ARMLoadStoreOptimizer.cpp 85 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447
  1. //===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. /// \file This file contains a pass that performs load / store related peephole
  11. /// optimizations. This pass should be run after register allocation.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "ARM.h"
  15. #include "ARMBaseInstrInfo.h"
  16. #include "ARMBaseRegisterInfo.h"
  17. #include "ARMISelLowering.h"
  18. #include "ARMMachineFunctionInfo.h"
  19. #include "ARMSubtarget.h"
  20. #include "MCTargetDesc/ARMAddressingModes.h"
  21. #include "MCTargetDesc/ARMBaseInfo.h"
  22. #include "Utils/ARMBaseInfo.h"
  23. #include "llvm/ADT/ArrayRef.h"
  24. #include "llvm/ADT/DenseMap.h"
  25. #include "llvm/ADT/DenseSet.h"
  26. #include "llvm/ADT/STLExtras.h"
  27. #include "llvm/ADT/SmallPtrSet.h"
  28. #include "llvm/ADT/SmallSet.h"
  29. #include "llvm/ADT/SmallVector.h"
  30. #include "llvm/ADT/Statistic.h"
  31. #include "llvm/ADT/iterator_range.h"
  32. #include "llvm/Analysis/AliasAnalysis.h"
  33. #include "llvm/CodeGen/LivePhysRegs.h"
  34. #include "llvm/CodeGen/MachineBasicBlock.h"
  35. #include "llvm/CodeGen/MachineFunction.h"
  36. #include "llvm/CodeGen/MachineFunctionPass.h"
  37. #include "llvm/CodeGen/MachineInstr.h"
  38. #include "llvm/CodeGen/MachineInstrBuilder.h"
  39. #include "llvm/CodeGen/MachineMemOperand.h"
  40. #include "llvm/CodeGen/MachineOperand.h"
  41. #include "llvm/CodeGen/MachineRegisterInfo.h"
  42. #include "llvm/CodeGen/RegisterClassInfo.h"
  43. #include "llvm/CodeGen/TargetFrameLowering.h"
  44. #include "llvm/CodeGen/TargetInstrInfo.h"
  45. #include "llvm/CodeGen/TargetLowering.h"
  46. #include "llvm/CodeGen/TargetRegisterInfo.h"
  47. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  48. #include "llvm/IR/DataLayout.h"
  49. #include "llvm/IR/DebugLoc.h"
  50. #include "llvm/IR/DerivedTypes.h"
  51. #include "llvm/IR/Function.h"
  52. #include "llvm/IR/Type.h"
  53. #include "llvm/MC/MCInstrDesc.h"
  54. #include "llvm/Pass.h"
  55. #include "llvm/Support/Allocator.h"
  56. #include "llvm/Support/CommandLine.h"
  57. #include "llvm/Support/Debug.h"
  58. #include "llvm/Support/ErrorHandling.h"
  59. #include "llvm/Support/raw_ostream.h"
  60. #include <algorithm>
  61. #include <cassert>
  62. #include <cstddef>
  63. #include <cstdlib>
  64. #include <iterator>
  65. #include <limits>
  66. #include <utility>
  67. using namespace llvm;
  68. #define DEBUG_TYPE "arm-ldst-opt"
  69. STATISTIC(NumLDMGened , "Number of ldm instructions generated");
  70. STATISTIC(NumSTMGened , "Number of stm instructions generated");
  71. STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
  72. STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
  73. STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
  74. STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
  75. STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
  76. STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
  77. STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
  78. STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
  79. STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
  80. /// This switch disables formation of double/multi instructions that could
  81. /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
  82. /// disabled. This can be used to create libraries that are robust even when
  83. /// users provoke undefined behaviour by supplying misaligned pointers.
  84. /// \see mayCombineMisaligned()
  85. static cl::opt<bool>
  86. AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
  87. cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
  88. #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
  89. namespace {
  90. /// Post- register allocation pass the combine load / store instructions to
  91. /// form ldm / stm instructions.
  92. struct ARMLoadStoreOpt : public MachineFunctionPass {
  93. static char ID;
  94. const MachineFunction *MF;
  95. const TargetInstrInfo *TII;
  96. const TargetRegisterInfo *TRI;
  97. const ARMSubtarget *STI;
  98. const TargetLowering *TL;
  99. ARMFunctionInfo *AFI;
  100. LivePhysRegs LiveRegs;
  101. RegisterClassInfo RegClassInfo;
  102. MachineBasicBlock::const_iterator LiveRegPos;
  103. bool LiveRegsValid;
  104. bool RegClassInfoValid;
  105. bool isThumb1, isThumb2;
  106. ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
  107. bool runOnMachineFunction(MachineFunction &Fn) override;
  108. MachineFunctionProperties getRequiredProperties() const override {
  109. return MachineFunctionProperties().set(
  110. MachineFunctionProperties::Property::NoVRegs);
  111. }
  112. StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
  113. private:
  114. /// A set of load/store MachineInstrs with same base register sorted by
  115. /// offset.
  116. struct MemOpQueueEntry {
  117. MachineInstr *MI;
  118. int Offset; ///< Load/Store offset.
  119. unsigned Position; ///< Position as counted from end of basic block.
  120. MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
  121. : MI(&MI), Offset(Offset), Position(Position) {}
  122. };
  123. using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
  124. /// A set of MachineInstrs that fulfill (nearly all) conditions to get
  125. /// merged into a LDM/STM.
  126. struct MergeCandidate {
  127. /// List of instructions ordered by load/store offset.
  128. SmallVector<MachineInstr*, 4> Instrs;
  129. /// Index in Instrs of the instruction being latest in the schedule.
  130. unsigned LatestMIIdx;
  131. /// Index in Instrs of the instruction being earliest in the schedule.
  132. unsigned EarliestMIIdx;
  133. /// Index into the basic block where the merged instruction will be
  134. /// inserted. (See MemOpQueueEntry.Position)
  135. unsigned InsertPos;
  136. /// Whether the instructions can be merged into a ldm/stm instruction.
  137. bool CanMergeToLSMulti;
  138. /// Whether the instructions can be merged into a ldrd/strd instruction.
  139. bool CanMergeToLSDouble;
  140. };
  141. SpecificBumpPtrAllocator<MergeCandidate> Allocator;
  142. SmallVector<const MergeCandidate*,4> Candidates;
  143. SmallVector<MachineInstr*,4> MergeBaseCandidates;
  144. void moveLiveRegsBefore(const MachineBasicBlock &MBB,
  145. MachineBasicBlock::const_iterator Before);
  146. unsigned findFreeReg(const TargetRegisterClass &RegClass);
  147. void UpdateBaseRegUses(MachineBasicBlock &MBB,
  148. MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
  149. unsigned Base, unsigned WordOffset,
  150. ARMCC::CondCodes Pred, unsigned PredReg);
  151. MachineInstr *CreateLoadStoreMulti(
  152. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  153. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  154. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  155. ArrayRef<std::pair<unsigned, bool>> Regs);
  156. MachineInstr *CreateLoadStoreDouble(
  157. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  158. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  159. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  160. ArrayRef<std::pair<unsigned, bool>> Regs) const;
  161. void FormCandidates(const MemOpQueue &MemOps);
  162. MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
  163. bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
  164. MachineBasicBlock::iterator &MBBI);
  165. bool MergeBaseUpdateLoadStore(MachineInstr *MI);
  166. bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
  167. bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
  168. bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
  169. bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
  170. bool CombineMovBx(MachineBasicBlock &MBB);
  171. };
  172. } // end anonymous namespace
  173. char ARMLoadStoreOpt::ID = 0;
  174. INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
  175. false)
  176. static bool definesCPSR(const MachineInstr &MI) {
  177. for (const auto &MO : MI.operands()) {
  178. if (!MO.isReg())
  179. continue;
  180. if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
  181. // If the instruction has live CPSR def, then it's not safe to fold it
  182. // into load / store.
  183. return true;
  184. }
  185. return false;
  186. }
  187. static int getMemoryOpOffset(const MachineInstr &MI) {
  188. unsigned Opcode = MI.getOpcode();
  189. bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
  190. unsigned NumOperands = MI.getDesc().getNumOperands();
  191. unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
  192. if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
  193. Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
  194. Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
  195. Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
  196. return OffField;
  197. // Thumb1 immediate offsets are scaled by 4
  198. if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
  199. Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
  200. return OffField * 4;
  201. int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
  202. : ARM_AM::getAM5Offset(OffField) * 4;
  203. ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
  204. : ARM_AM::getAM5Op(OffField);
  205. if (Op == ARM_AM::sub)
  206. return -Offset;
  207. return Offset;
  208. }
  209. static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
  210. return MI.getOperand(1);
  211. }
  212. static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
  213. return MI.getOperand(0);
  214. }
  215. static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
  216. switch (Opcode) {
  217. default: llvm_unreachable("Unhandled opcode!");
  218. case ARM::LDRi12:
  219. ++NumLDMGened;
  220. switch (Mode) {
  221. default: llvm_unreachable("Unhandled submode!");
  222. case ARM_AM::ia: return ARM::LDMIA;
  223. case ARM_AM::da: return ARM::LDMDA;
  224. case ARM_AM::db: return ARM::LDMDB;
  225. case ARM_AM::ib: return ARM::LDMIB;
  226. }
  227. case ARM::STRi12:
  228. ++NumSTMGened;
  229. switch (Mode) {
  230. default: llvm_unreachable("Unhandled submode!");
  231. case ARM_AM::ia: return ARM::STMIA;
  232. case ARM_AM::da: return ARM::STMDA;
  233. case ARM_AM::db: return ARM::STMDB;
  234. case ARM_AM::ib: return ARM::STMIB;
  235. }
  236. case ARM::tLDRi:
  237. case ARM::tLDRspi:
  238. // tLDMIA is writeback-only - unless the base register is in the input
  239. // reglist.
  240. ++NumLDMGened;
  241. switch (Mode) {
  242. default: llvm_unreachable("Unhandled submode!");
  243. case ARM_AM::ia: return ARM::tLDMIA;
  244. }
  245. case ARM::tSTRi:
  246. case ARM::tSTRspi:
  247. // There is no non-writeback tSTMIA either.
  248. ++NumSTMGened;
  249. switch (Mode) {
  250. default: llvm_unreachable("Unhandled submode!");
  251. case ARM_AM::ia: return ARM::tSTMIA_UPD;
  252. }
  253. case ARM::t2LDRi8:
  254. case ARM::t2LDRi12:
  255. ++NumLDMGened;
  256. switch (Mode) {
  257. default: llvm_unreachable("Unhandled submode!");
  258. case ARM_AM::ia: return ARM::t2LDMIA;
  259. case ARM_AM::db: return ARM::t2LDMDB;
  260. }
  261. case ARM::t2STRi8:
  262. case ARM::t2STRi12:
  263. ++NumSTMGened;
  264. switch (Mode) {
  265. default: llvm_unreachable("Unhandled submode!");
  266. case ARM_AM::ia: return ARM::t2STMIA;
  267. case ARM_AM::db: return ARM::t2STMDB;
  268. }
  269. case ARM::VLDRS:
  270. ++NumVLDMGened;
  271. switch (Mode) {
  272. default: llvm_unreachable("Unhandled submode!");
  273. case ARM_AM::ia: return ARM::VLDMSIA;
  274. case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
  275. }
  276. case ARM::VSTRS:
  277. ++NumVSTMGened;
  278. switch (Mode) {
  279. default: llvm_unreachable("Unhandled submode!");
  280. case ARM_AM::ia: return ARM::VSTMSIA;
  281. case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
  282. }
  283. case ARM::VLDRD:
  284. ++NumVLDMGened;
  285. switch (Mode) {
  286. default: llvm_unreachable("Unhandled submode!");
  287. case ARM_AM::ia: return ARM::VLDMDIA;
  288. case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
  289. }
  290. case ARM::VSTRD:
  291. ++NumVSTMGened;
  292. switch (Mode) {
  293. default: llvm_unreachable("Unhandled submode!");
  294. case ARM_AM::ia: return ARM::VSTMDIA;
  295. case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
  296. }
  297. }
  298. }
  299. static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
  300. switch (Opcode) {
  301. default: llvm_unreachable("Unhandled opcode!");
  302. case ARM::LDMIA_RET:
  303. case ARM::LDMIA:
  304. case ARM::LDMIA_UPD:
  305. case ARM::STMIA:
  306. case ARM::STMIA_UPD:
  307. case ARM::tLDMIA:
  308. case ARM::tLDMIA_UPD:
  309. case ARM::tSTMIA_UPD:
  310. case ARM::t2LDMIA_RET:
  311. case ARM::t2LDMIA:
  312. case ARM::t2LDMIA_UPD:
  313. case ARM::t2STMIA:
  314. case ARM::t2STMIA_UPD:
  315. case ARM::VLDMSIA:
  316. case ARM::VLDMSIA_UPD:
  317. case ARM::VSTMSIA:
  318. case ARM::VSTMSIA_UPD:
  319. case ARM::VLDMDIA:
  320. case ARM::VLDMDIA_UPD:
  321. case ARM::VSTMDIA:
  322. case ARM::VSTMDIA_UPD:
  323. return ARM_AM::ia;
  324. case ARM::LDMDA:
  325. case ARM::LDMDA_UPD:
  326. case ARM::STMDA:
  327. case ARM::STMDA_UPD:
  328. return ARM_AM::da;
  329. case ARM::LDMDB:
  330. case ARM::LDMDB_UPD:
  331. case ARM::STMDB:
  332. case ARM::STMDB_UPD:
  333. case ARM::t2LDMDB:
  334. case ARM::t2LDMDB_UPD:
  335. case ARM::t2STMDB:
  336. case ARM::t2STMDB_UPD:
  337. case ARM::VLDMSDB_UPD:
  338. case ARM::VSTMSDB_UPD:
  339. case ARM::VLDMDDB_UPD:
  340. case ARM::VSTMDDB_UPD:
  341. return ARM_AM::db;
  342. case ARM::LDMIB:
  343. case ARM::LDMIB_UPD:
  344. case ARM::STMIB:
  345. case ARM::STMIB_UPD:
  346. return ARM_AM::ib;
  347. }
  348. }
  349. static bool isT1i32Load(unsigned Opc) {
  350. return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
  351. }
  352. static bool isT2i32Load(unsigned Opc) {
  353. return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
  354. }
  355. static bool isi32Load(unsigned Opc) {
  356. return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
  357. }
  358. static bool isT1i32Store(unsigned Opc) {
  359. return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
  360. }
  361. static bool isT2i32Store(unsigned Opc) {
  362. return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
  363. }
  364. static bool isi32Store(unsigned Opc) {
  365. return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
  366. }
  367. static bool isLoadSingle(unsigned Opc) {
  368. return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
  369. }
  370. static unsigned getImmScale(unsigned Opc) {
  371. switch (Opc) {
  372. default: llvm_unreachable("Unhandled opcode!");
  373. case ARM::tLDRi:
  374. case ARM::tSTRi:
  375. case ARM::tLDRspi:
  376. case ARM::tSTRspi:
  377. return 1;
  378. case ARM::tLDRHi:
  379. case ARM::tSTRHi:
  380. return 2;
  381. case ARM::tLDRBi:
  382. case ARM::tSTRBi:
  383. return 4;
  384. }
  385. }
  386. static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
  387. switch (MI->getOpcode()) {
  388. default: return 0;
  389. case ARM::LDRi12:
  390. case ARM::STRi12:
  391. case ARM::tLDRi:
  392. case ARM::tSTRi:
  393. case ARM::tLDRspi:
  394. case ARM::tSTRspi:
  395. case ARM::t2LDRi8:
  396. case ARM::t2LDRi12:
  397. case ARM::t2STRi8:
  398. case ARM::t2STRi12:
  399. case ARM::VLDRS:
  400. case ARM::VSTRS:
  401. return 4;
  402. case ARM::VLDRD:
  403. case ARM::VSTRD:
  404. return 8;
  405. case ARM::LDMIA:
  406. case ARM::LDMDA:
  407. case ARM::LDMDB:
  408. case ARM::LDMIB:
  409. case ARM::STMIA:
  410. case ARM::STMDA:
  411. case ARM::STMDB:
  412. case ARM::STMIB:
  413. case ARM::tLDMIA:
  414. case ARM::tLDMIA_UPD:
  415. case ARM::tSTMIA_UPD:
  416. case ARM::t2LDMIA:
  417. case ARM::t2LDMDB:
  418. case ARM::t2STMIA:
  419. case ARM::t2STMDB:
  420. case ARM::VLDMSIA:
  421. case ARM::VSTMSIA:
  422. return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
  423. case ARM::VLDMDIA:
  424. case ARM::VSTMDIA:
  425. return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
  426. }
  427. }
  428. /// Update future uses of the base register with the offset introduced
  429. /// due to writeback. This function only works on Thumb1.
  430. void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
  431. MachineBasicBlock::iterator MBBI,
  432. const DebugLoc &DL, unsigned Base,
  433. unsigned WordOffset,
  434. ARMCC::CondCodes Pred,
  435. unsigned PredReg) {
  436. assert(isThumb1 && "Can only update base register uses for Thumb1!");
  437. // Start updating any instructions with immediate offsets. Insert a SUB before
  438. // the first non-updateable instruction (if any).
  439. for (; MBBI != MBB.end(); ++MBBI) {
  440. bool InsertSub = false;
  441. unsigned Opc = MBBI->getOpcode();
  442. if (MBBI->readsRegister(Base)) {
  443. int Offset;
  444. bool IsLoad =
  445. Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
  446. bool IsStore =
  447. Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
  448. if (IsLoad || IsStore) {
  449. // Loads and stores with immediate offsets can be updated, but only if
  450. // the new offset isn't negative.
  451. // The MachineOperand containing the offset immediate is the last one
  452. // before predicates.
  453. MachineOperand &MO =
  454. MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
  455. // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
  456. Offset = MO.getImm() - WordOffset * getImmScale(Opc);
  457. // If storing the base register, it needs to be reset first.
  458. unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
  459. if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
  460. MO.setImm(Offset);
  461. else
  462. InsertSub = true;
  463. } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
  464. !definesCPSR(*MBBI)) {
  465. // SUBS/ADDS using this register, with a dead def of the CPSR.
  466. // Merge it with the update; if the merged offset is too large,
  467. // insert a new sub instead.
  468. MachineOperand &MO =
  469. MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
  470. Offset = (Opc == ARM::tSUBi8) ?
  471. MO.getImm() + WordOffset * 4 :
  472. MO.getImm() - WordOffset * 4 ;
  473. if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
  474. // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
  475. // Offset == 0.
  476. MO.setImm(Offset);
  477. // The base register has now been reset, so exit early.
  478. return;
  479. } else {
  480. InsertSub = true;
  481. }
  482. } else {
  483. // Can't update the instruction.
  484. InsertSub = true;
  485. }
  486. } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
  487. // Since SUBS sets the condition flags, we can't place the base reset
  488. // after an instruction that has a live CPSR def.
  489. // The base register might also contain an argument for a function call.
  490. InsertSub = true;
  491. }
  492. if (InsertSub) {
  493. // An instruction above couldn't be updated, so insert a sub.
  494. BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
  495. .add(t1CondCodeOp(true))
  496. .addReg(Base)
  497. .addImm(WordOffset * 4)
  498. .addImm(Pred)
  499. .addReg(PredReg);
  500. return;
  501. }
  502. if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
  503. // Register got killed. Stop updating.
  504. return;
  505. }
  506. // End of block was reached.
  507. if (MBB.succ_size() > 0) {
  508. // FIXME: Because of a bug, live registers are sometimes missing from
  509. // the successor blocks' live-in sets. This means we can't trust that
  510. // information and *always* have to reset at the end of a block.
  511. // See PR21029.
  512. if (MBBI != MBB.end()) --MBBI;
  513. BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
  514. .add(t1CondCodeOp(true))
  515. .addReg(Base)
  516. .addImm(WordOffset * 4)
  517. .addImm(Pred)
  518. .addReg(PredReg);
  519. }
  520. }
  521. /// Return the first register of class \p RegClass that is not in \p Regs.
  522. unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
  523. if (!RegClassInfoValid) {
  524. RegClassInfo.runOnMachineFunction(*MF);
  525. RegClassInfoValid = true;
  526. }
  527. for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
  528. if (!LiveRegs.contains(Reg))
  529. return Reg;
  530. return 0;
  531. }
  532. /// Compute live registers just before instruction \p Before (in normal schedule
  533. /// direction). Computes backwards so multiple queries in the same block must
  534. /// come in reverse order.
  535. void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
  536. MachineBasicBlock::const_iterator Before) {
  537. // Initialize if we never queried in this block.
  538. if (!LiveRegsValid) {
  539. LiveRegs.init(*TRI);
  540. LiveRegs.addLiveOuts(MBB);
  541. LiveRegPos = MBB.end();
  542. LiveRegsValid = true;
  543. }
  544. // Move backward just before the "Before" position.
  545. while (LiveRegPos != Before) {
  546. --LiveRegPos;
  547. LiveRegs.stepBackward(*LiveRegPos);
  548. }
  549. }
  550. static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
  551. unsigned Reg) {
  552. for (const std::pair<unsigned, bool> &R : Regs)
  553. if (R.first == Reg)
  554. return true;
  555. return false;
  556. }
  557. /// Create and insert a LDM or STM with Base as base register and registers in
  558. /// Regs as the register operands that would be loaded / stored. It returns
  559. /// true if the transformation is done.
  560. MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
  561. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  562. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  563. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  564. ArrayRef<std::pair<unsigned, bool>> Regs) {
  565. unsigned NumRegs = Regs.size();
  566. assert(NumRegs > 1);
  567. // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
  568. // Compute liveness information for that register to make the decision.
  569. bool SafeToClobberCPSR = !isThumb1 ||
  570. (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
  571. MachineBasicBlock::LQR_Dead);
  572. bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
  573. // Exception: If the base register is in the input reglist, Thumb1 LDM is
  574. // non-writeback.
  575. // It's also not possible to merge an STR of the base register in Thumb1.
  576. if (isThumb1 && ContainsReg(Regs, Base)) {
  577. assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
  578. if (Opcode == ARM::tLDRi)
  579. Writeback = false;
  580. else if (Opcode == ARM::tSTRi)
  581. return nullptr;
  582. }
  583. ARM_AM::AMSubMode Mode = ARM_AM::ia;
  584. // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
  585. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
  586. bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
  587. if (Offset == 4 && haveIBAndDA) {
  588. Mode = ARM_AM::ib;
  589. } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
  590. Mode = ARM_AM::da;
  591. } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
  592. // VLDM/VSTM do not support DB mode without also updating the base reg.
  593. Mode = ARM_AM::db;
  594. } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
  595. // Check if this is a supported opcode before inserting instructions to
  596. // calculate a new base register.
  597. if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
  598. // If starting offset isn't zero, insert a MI to materialize a new base.
  599. // But only do so if it is cost effective, i.e. merging more than two
  600. // loads / stores.
  601. if (NumRegs <= 2)
  602. return nullptr;
  603. // On Thumb1, it's not worth materializing a new base register without
  604. // clobbering the CPSR (i.e. not using ADDS/SUBS).
  605. if (!SafeToClobberCPSR)
  606. return nullptr;
  607. unsigned NewBase;
  608. if (isi32Load(Opcode)) {
  609. // If it is a load, then just use one of the destination registers
  610. // as the new base. Will no longer be writeback in Thumb1.
  611. NewBase = Regs[NumRegs-1].first;
  612. Writeback = false;
  613. } else {
  614. // Find a free register that we can use as scratch register.
  615. moveLiveRegsBefore(MBB, InsertBefore);
  616. // The merged instruction does not exist yet but will use several Regs if
  617. // it is a Store.
  618. if (!isLoadSingle(Opcode))
  619. for (const std::pair<unsigned, bool> &R : Regs)
  620. LiveRegs.addReg(R.first);
  621. NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
  622. if (NewBase == 0)
  623. return nullptr;
  624. }
  625. int BaseOpc =
  626. isThumb2 ? ARM::t2ADDri :
  627. (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
  628. (isThumb1 && Offset < 8) ? ARM::tADDi3 :
  629. isThumb1 ? ARM::tADDi8 : ARM::ADDri;
  630. if (Offset < 0) {
  631. Offset = - Offset;
  632. BaseOpc =
  633. isThumb2 ? ARM::t2SUBri :
  634. (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
  635. isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
  636. }
  637. if (!TL->isLegalAddImmediate(Offset))
  638. // FIXME: Try add with register operand?
  639. return nullptr; // Probably not worth it then.
  640. // We can only append a kill flag to the add/sub input if the value is not
  641. // used in the register list of the stm as well.
  642. bool KillOldBase = BaseKill &&
  643. (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
  644. if (isThumb1) {
  645. // Thumb1: depending on immediate size, use either
  646. // ADDS NewBase, Base, #imm3
  647. // or
  648. // MOV NewBase, Base
  649. // ADDS NewBase, #imm8.
  650. if (Base != NewBase &&
  651. (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
  652. // Need to insert a MOV to the new base first.
  653. if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
  654. !STI->hasV6Ops()) {
  655. // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
  656. if (Pred != ARMCC::AL)
  657. return nullptr;
  658. BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
  659. .addReg(Base, getKillRegState(KillOldBase));
  660. } else
  661. BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
  662. .addReg(Base, getKillRegState(KillOldBase))
  663. .add(predOps(Pred, PredReg));
  664. // The following ADDS/SUBS becomes an update.
  665. Base = NewBase;
  666. KillOldBase = true;
  667. }
  668. if (BaseOpc == ARM::tADDrSPi) {
  669. assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
  670. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  671. .addReg(Base, getKillRegState(KillOldBase))
  672. .addImm(Offset / 4)
  673. .add(predOps(Pred, PredReg));
  674. } else
  675. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  676. .add(t1CondCodeOp(true))
  677. .addReg(Base, getKillRegState(KillOldBase))
  678. .addImm(Offset)
  679. .add(predOps(Pred, PredReg));
  680. } else {
  681. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  682. .addReg(Base, getKillRegState(KillOldBase))
  683. .addImm(Offset)
  684. .add(predOps(Pred, PredReg))
  685. .add(condCodeOp());
  686. }
  687. Base = NewBase;
  688. BaseKill = true; // New base is always killed straight away.
  689. }
  690. bool isDef = isLoadSingle(Opcode);
  691. // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
  692. // base register writeback.
  693. Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
  694. if (!Opcode)
  695. return nullptr;
  696. // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
  697. // - There is no writeback (LDM of base register),
  698. // - the base register is killed by the merged instruction,
  699. // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
  700. // to reset the base register.
  701. // Otherwise, don't merge.
  702. // It's safe to return here since the code to materialize a new base register
  703. // above is also conditional on SafeToClobberCPSR.
  704. if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
  705. return nullptr;
  706. MachineInstrBuilder MIB;
  707. if (Writeback) {
  708. assert(isThumb1 && "expected Writeback only inThumb1");
  709. if (Opcode == ARM::tLDMIA) {
  710. assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
  711. // Update tLDMIA with writeback if necessary.
  712. Opcode = ARM::tLDMIA_UPD;
  713. }
  714. MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
  715. // Thumb1: we might need to set base writeback when building the MI.
  716. MIB.addReg(Base, getDefRegState(true))
  717. .addReg(Base, getKillRegState(BaseKill));
  718. // The base isn't dead after a merged instruction with writeback.
  719. // Insert a sub instruction after the newly formed instruction to reset.
  720. if (!BaseKill)
  721. UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
  722. } else {
  723. // No writeback, simply build the MachineInstr.
  724. MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
  725. MIB.addReg(Base, getKillRegState(BaseKill));
  726. }
  727. MIB.addImm(Pred).addReg(PredReg);
  728. for (const std::pair<unsigned, bool> &R : Regs)
  729. MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
  730. return MIB.getInstr();
  731. }
  732. MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
  733. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  734. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  735. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  736. ArrayRef<std::pair<unsigned, bool>> Regs) const {
  737. bool IsLoad = isi32Load(Opcode);
  738. assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
  739. unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
  740. assert(Regs.size() == 2);
  741. MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
  742. TII->get(LoadStoreOpcode));
  743. if (IsLoad) {
  744. MIB.addReg(Regs[0].first, RegState::Define)
  745. .addReg(Regs[1].first, RegState::Define);
  746. } else {
  747. MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
  748. .addReg(Regs[1].first, getKillRegState(Regs[1].second));
  749. }
  750. MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
  751. return MIB.getInstr();
  752. }
  753. /// Call MergeOps and update MemOps and merges accordingly on success.
  754. MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
  755. const MachineInstr *First = Cand.Instrs.front();
  756. unsigned Opcode = First->getOpcode();
  757. bool IsLoad = isLoadSingle(Opcode);
  758. SmallVector<std::pair<unsigned, bool>, 8> Regs;
  759. SmallVector<unsigned, 4> ImpDefs;
  760. DenseSet<unsigned> KilledRegs;
  761. DenseSet<unsigned> UsedRegs;
  762. // Determine list of registers and list of implicit super-register defs.
  763. for (const MachineInstr *MI : Cand.Instrs) {
  764. const MachineOperand &MO = getLoadStoreRegOp(*MI);
  765. unsigned Reg = MO.getReg();
  766. bool IsKill = MO.isKill();
  767. if (IsKill)
  768. KilledRegs.insert(Reg);
  769. Regs.push_back(std::make_pair(Reg, IsKill));
  770. UsedRegs.insert(Reg);
  771. if (IsLoad) {
  772. // Collect any implicit defs of super-registers, after merging we can't
  773. // be sure anymore that we properly preserved these live ranges and must
  774. // removed these implicit operands.
  775. for (const MachineOperand &MO : MI->implicit_operands()) {
  776. if (!MO.isReg() || !MO.isDef() || MO.isDead())
  777. continue;
  778. assert(MO.isImplicit());
  779. unsigned DefReg = MO.getReg();
  780. if (is_contained(ImpDefs, DefReg))
  781. continue;
  782. // We can ignore cases where the super-reg is read and written.
  783. if (MI->readsRegister(DefReg))
  784. continue;
  785. ImpDefs.push_back(DefReg);
  786. }
  787. }
  788. }
  789. // Attempt the merge.
  790. using iterator = MachineBasicBlock::iterator;
  791. MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
  792. iterator InsertBefore = std::next(iterator(LatestMI));
  793. MachineBasicBlock &MBB = *LatestMI->getParent();
  794. unsigned Offset = getMemoryOpOffset(*First);
  795. unsigned Base = getLoadStoreBaseOp(*First).getReg();
  796. bool BaseKill = LatestMI->killsRegister(Base);
  797. unsigned PredReg = 0;
  798. ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
  799. DebugLoc DL = First->getDebugLoc();
  800. MachineInstr *Merged = nullptr;
  801. if (Cand.CanMergeToLSDouble)
  802. Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
  803. Opcode, Pred, PredReg, DL, Regs);
  804. if (!Merged && Cand.CanMergeToLSMulti)
  805. Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
  806. Opcode, Pred, PredReg, DL, Regs);
  807. if (!Merged)
  808. return nullptr;
  809. // Determine earliest instruction that will get removed. We then keep an
  810. // iterator just above it so the following erases don't invalidated it.
  811. iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
  812. bool EarliestAtBegin = false;
  813. if (EarliestI == MBB.begin()) {
  814. EarliestAtBegin = true;
  815. } else {
  816. EarliestI = std::prev(EarliestI);
  817. }
  818. // Remove instructions which have been merged.
  819. for (MachineInstr *MI : Cand.Instrs)
  820. MBB.erase(MI);
  821. // Determine range between the earliest removed instruction and the new one.
  822. if (EarliestAtBegin)
  823. EarliestI = MBB.begin();
  824. else
  825. EarliestI = std::next(EarliestI);
  826. auto FixupRange = make_range(EarliestI, iterator(Merged));
  827. if (isLoadSingle(Opcode)) {
  828. // If the previous loads defined a super-reg, then we have to mark earlier
  829. // operands undef; Replicate the super-reg def on the merged instruction.
  830. for (MachineInstr &MI : FixupRange) {
  831. for (unsigned &ImpDefReg : ImpDefs) {
  832. for (MachineOperand &MO : MI.implicit_operands()) {
  833. if (!MO.isReg() || MO.getReg() != ImpDefReg)
  834. continue;
  835. if (MO.readsReg())
  836. MO.setIsUndef();
  837. else if (MO.isDef())
  838. ImpDefReg = 0;
  839. }
  840. }
  841. }
  842. MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
  843. for (unsigned ImpDef : ImpDefs)
  844. MIB.addReg(ImpDef, RegState::ImplicitDefine);
  845. } else {
  846. // Remove kill flags: We are possibly storing the values later now.
  847. assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
  848. for (MachineInstr &MI : FixupRange) {
  849. for (MachineOperand &MO : MI.uses()) {
  850. if (!MO.isReg() || !MO.isKill())
  851. continue;
  852. if (UsedRegs.count(MO.getReg()))
  853. MO.setIsKill(false);
  854. }
  855. }
  856. assert(ImpDefs.empty());
  857. }
  858. return Merged;
  859. }
  860. static bool isValidLSDoubleOffset(int Offset) {
  861. unsigned Value = abs(Offset);
  862. // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
  863. // multiplied by 4.
  864. return (Value % 4) == 0 && Value < 1024;
  865. }
  866. /// Return true for loads/stores that can be combined to a double/multi
  867. /// operation without increasing the requirements for alignment.
  868. static bool mayCombineMisaligned(const TargetSubtargetInfo &STI,
  869. const MachineInstr &MI) {
  870. // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
  871. // difference.
  872. unsigned Opcode = MI.getOpcode();
  873. if (!isi32Load(Opcode) && !isi32Store(Opcode))
  874. return true;
  875. // Stack pointer alignment is out of the programmers control so we can trust
  876. // SP-relative loads/stores.
  877. if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
  878. STI.getFrameLowering()->getTransientStackAlignment() >= 4)
  879. return true;
  880. return false;
  881. }
  882. /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
  883. void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
  884. const MachineInstr *FirstMI = MemOps[0].MI;
  885. unsigned Opcode = FirstMI->getOpcode();
  886. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
  887. unsigned Size = getLSMultipleTransferSize(FirstMI);
  888. unsigned SIndex = 0;
  889. unsigned EIndex = MemOps.size();
  890. do {
  891. // Look at the first instruction.
  892. const MachineInstr *MI = MemOps[SIndex].MI;
  893. int Offset = MemOps[SIndex].Offset;
  894. const MachineOperand &PMO = getLoadStoreRegOp(*MI);
  895. unsigned PReg = PMO.getReg();
  896. unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
  897. : TRI->getEncodingValue(PReg);
  898. unsigned Latest = SIndex;
  899. unsigned Earliest = SIndex;
  900. unsigned Count = 1;
  901. bool CanMergeToLSDouble =
  902. STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
  903. // ARM errata 602117: LDRD with base in list may result in incorrect base
  904. // register when interrupted or faulted.
  905. if (STI->isCortexM3() && isi32Load(Opcode) &&
  906. PReg == getLoadStoreBaseOp(*MI).getReg())
  907. CanMergeToLSDouble = false;
  908. bool CanMergeToLSMulti = true;
  909. // On swift vldm/vstm starting with an odd register number as that needs
  910. // more uops than single vldrs.
  911. if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
  912. CanMergeToLSMulti = false;
  913. // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
  914. // deprecated; LDM to PC is fine but cannot happen here.
  915. if (PReg == ARM::SP || PReg == ARM::PC)
  916. CanMergeToLSMulti = CanMergeToLSDouble = false;
  917. // Should we be conservative?
  918. if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
  919. CanMergeToLSMulti = CanMergeToLSDouble = false;
  920. // Merge following instructions where possible.
  921. for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
  922. int NewOffset = MemOps[I].Offset;
  923. if (NewOffset != Offset + (int)Size)
  924. break;
  925. const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
  926. unsigned Reg = MO.getReg();
  927. if (Reg == ARM::SP || Reg == ARM::PC)
  928. break;
  929. // See if the current load/store may be part of a multi load/store.
  930. unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
  931. : TRI->getEncodingValue(Reg);
  932. bool PartOfLSMulti = CanMergeToLSMulti;
  933. if (PartOfLSMulti) {
  934. // Register numbers must be in ascending order.
  935. if (RegNum <= PRegNum)
  936. PartOfLSMulti = false;
  937. // For VFP / NEON load/store multiples, the registers must be
  938. // consecutive and within the limit on the number of registers per
  939. // instruction.
  940. else if (!isNotVFP && RegNum != PRegNum+1)
  941. PartOfLSMulti = false;
  942. }
  943. // See if the current load/store may be part of a double load/store.
  944. bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
  945. if (!PartOfLSMulti && !PartOfLSDouble)
  946. break;
  947. CanMergeToLSMulti &= PartOfLSMulti;
  948. CanMergeToLSDouble &= PartOfLSDouble;
  949. // Track MemOp with latest and earliest position (Positions are
  950. // counted in reverse).
  951. unsigned Position = MemOps[I].Position;
  952. if (Position < MemOps[Latest].Position)
  953. Latest = I;
  954. else if (Position > MemOps[Earliest].Position)
  955. Earliest = I;
  956. // Prepare for next MemOp.
  957. Offset += Size;
  958. PRegNum = RegNum;
  959. }
  960. // Form a candidate from the Ops collected so far.
  961. MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
  962. for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
  963. Candidate->Instrs.push_back(MemOps[C].MI);
  964. Candidate->LatestMIIdx = Latest - SIndex;
  965. Candidate->EarliestMIIdx = Earliest - SIndex;
  966. Candidate->InsertPos = MemOps[Latest].Position;
  967. if (Count == 1)
  968. CanMergeToLSMulti = CanMergeToLSDouble = false;
  969. Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
  970. Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
  971. Candidates.push_back(Candidate);
  972. // Continue after the chain.
  973. SIndex += Count;
  974. } while (SIndex < EIndex);
  975. }
  976. static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
  977. ARM_AM::AMSubMode Mode) {
  978. switch (Opc) {
  979. default: llvm_unreachable("Unhandled opcode!");
  980. case ARM::LDMIA:
  981. case ARM::LDMDA:
  982. case ARM::LDMDB:
  983. case ARM::LDMIB:
  984. switch (Mode) {
  985. default: llvm_unreachable("Unhandled submode!");
  986. case ARM_AM::ia: return ARM::LDMIA_UPD;
  987. case ARM_AM::ib: return ARM::LDMIB_UPD;
  988. case ARM_AM::da: return ARM::LDMDA_UPD;
  989. case ARM_AM::db: return ARM::LDMDB_UPD;
  990. }
  991. case ARM::STMIA:
  992. case ARM::STMDA:
  993. case ARM::STMDB:
  994. case ARM::STMIB:
  995. switch (Mode) {
  996. default: llvm_unreachable("Unhandled submode!");
  997. case ARM_AM::ia: return ARM::STMIA_UPD;
  998. case ARM_AM::ib: return ARM::STMIB_UPD;
  999. case ARM_AM::da: return ARM::STMDA_UPD;
  1000. case ARM_AM::db: return ARM::STMDB_UPD;
  1001. }
  1002. case ARM::t2LDMIA:
  1003. case ARM::t2LDMDB:
  1004. switch (Mode) {
  1005. default: llvm_unreachable("Unhandled submode!");
  1006. case ARM_AM::ia: return ARM::t2LDMIA_UPD;
  1007. case ARM_AM::db: return ARM::t2LDMDB_UPD;
  1008. }
  1009. case ARM::t2STMIA:
  1010. case ARM::t2STMDB:
  1011. switch (Mode) {
  1012. default: llvm_unreachable("Unhandled submode!");
  1013. case ARM_AM::ia: return ARM::t2STMIA_UPD;
  1014. case ARM_AM::db: return ARM::t2STMDB_UPD;
  1015. }
  1016. case ARM::VLDMSIA:
  1017. switch (Mode) {
  1018. default: llvm_unreachable("Unhandled submode!");
  1019. case ARM_AM::ia: return ARM::VLDMSIA_UPD;
  1020. case ARM_AM::db: return ARM::VLDMSDB_UPD;
  1021. }
  1022. case ARM::VLDMDIA:
  1023. switch (Mode) {
  1024. default: llvm_unreachable("Unhandled submode!");
  1025. case ARM_AM::ia: return ARM::VLDMDIA_UPD;
  1026. case ARM_AM::db: return ARM::VLDMDDB_UPD;
  1027. }
  1028. case ARM::VSTMSIA:
  1029. switch (Mode) {
  1030. default: llvm_unreachable("Unhandled submode!");
  1031. case ARM_AM::ia: return ARM::VSTMSIA_UPD;
  1032. case ARM_AM::db: return ARM::VSTMSDB_UPD;
  1033. }
  1034. case ARM::VSTMDIA:
  1035. switch (Mode) {
  1036. default: llvm_unreachable("Unhandled submode!");
  1037. case ARM_AM::ia: return ARM::VSTMDIA_UPD;
  1038. case ARM_AM::db: return ARM::VSTMDDB_UPD;
  1039. }
  1040. }
  1041. }
  1042. /// Check if the given instruction increments or decrements a register and
  1043. /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
  1044. /// generated by the instruction are possibly read as well.
  1045. static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
  1046. ARMCC::CondCodes Pred, unsigned PredReg) {
  1047. bool CheckCPSRDef;
  1048. int Scale;
  1049. switch (MI.getOpcode()) {
  1050. case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
  1051. case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
  1052. case ARM::t2SUBri:
  1053. case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
  1054. case ARM::t2ADDri:
  1055. case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
  1056. case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
  1057. case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
  1058. default: return 0;
  1059. }
  1060. unsigned MIPredReg;
  1061. if (MI.getOperand(0).getReg() != Reg ||
  1062. MI.getOperand(1).getReg() != Reg ||
  1063. getInstrPredicate(MI, MIPredReg) != Pred ||
  1064. MIPredReg != PredReg)
  1065. return 0;
  1066. if (CheckCPSRDef && definesCPSR(MI))
  1067. return 0;
  1068. return MI.getOperand(2).getImm() * Scale;
  1069. }
  1070. /// Searches for an increment or decrement of \p Reg before \p MBBI.
  1071. static MachineBasicBlock::iterator
  1072. findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
  1073. ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
  1074. Offset = 0;
  1075. MachineBasicBlock &MBB = *MBBI->getParent();
  1076. MachineBasicBlock::iterator BeginMBBI = MBB.begin();
  1077. MachineBasicBlock::iterator EndMBBI = MBB.end();
  1078. if (MBBI == BeginMBBI)
  1079. return EndMBBI;
  1080. // Skip debug values.
  1081. MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
  1082. while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
  1083. --PrevMBBI;
  1084. Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
  1085. return Offset == 0 ? EndMBBI : PrevMBBI;
  1086. }
  1087. /// Searches for a increment or decrement of \p Reg after \p MBBI.
  1088. static MachineBasicBlock::iterator
  1089. findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
  1090. ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
  1091. Offset = 0;
  1092. MachineBasicBlock &MBB = *MBBI->getParent();
  1093. MachineBasicBlock::iterator EndMBBI = MBB.end();
  1094. MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
  1095. // Skip debug values.
  1096. while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
  1097. ++NextMBBI;
  1098. if (NextMBBI == EndMBBI)
  1099. return EndMBBI;
  1100. Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
  1101. return Offset == 0 ? EndMBBI : NextMBBI;
  1102. }
  1103. /// Fold proceeding/trailing inc/dec of base register into the
  1104. /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
  1105. ///
  1106. /// stmia rn, <ra, rb, rc>
  1107. /// rn := rn + 4 * 3;
  1108. /// =>
  1109. /// stmia rn!, <ra, rb, rc>
  1110. ///
  1111. /// rn := rn - 4 * 3;
  1112. /// ldmia rn, <ra, rb, rc>
  1113. /// =>
  1114. /// ldmdb rn!, <ra, rb, rc>
  1115. bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
  1116. // Thumb1 is already using updating loads/stores.
  1117. if (isThumb1) return false;
  1118. const MachineOperand &BaseOP = MI->getOperand(0);
  1119. unsigned Base = BaseOP.getReg();
  1120. bool BaseKill = BaseOP.isKill();
  1121. unsigned PredReg = 0;
  1122. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1123. unsigned Opcode = MI->getOpcode();
  1124. DebugLoc DL = MI->getDebugLoc();
  1125. // Can't use an updating ld/st if the base register is also a dest
  1126. // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
  1127. for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
  1128. if (MI->getOperand(i).getReg() == Base)
  1129. return false;
  1130. int Bytes = getLSMultipleTransferSize(MI);
  1131. MachineBasicBlock &MBB = *MI->getParent();
  1132. MachineBasicBlock::iterator MBBI(MI);
  1133. int Offset;
  1134. MachineBasicBlock::iterator MergeInstr
  1135. = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
  1136. ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
  1137. if (Mode == ARM_AM::ia && Offset == -Bytes) {
  1138. Mode = ARM_AM::db;
  1139. } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
  1140. Mode = ARM_AM::da;
  1141. } else {
  1142. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
  1143. if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
  1144. ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
  1145. // We couldn't find an inc/dec to merge. But if the base is dead, we
  1146. // can still change to a writeback form as that will save us 2 bytes
  1147. // of code size. It can create WAW hazards though, so only do it if
  1148. // we're minimizing code size.
  1149. if (!MBB.getParent()->getFunction().optForMinSize() || !BaseKill)
  1150. return false;
  1151. bool HighRegsUsed = false;
  1152. for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
  1153. if (MI->getOperand(i).getReg() >= ARM::R8) {
  1154. HighRegsUsed = true;
  1155. break;
  1156. }
  1157. if (!HighRegsUsed)
  1158. MergeInstr = MBB.end();
  1159. else
  1160. return false;
  1161. }
  1162. }
  1163. if (MergeInstr != MBB.end())
  1164. MBB.erase(MergeInstr);
  1165. unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
  1166. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
  1167. .addReg(Base, getDefRegState(true)) // WB base register
  1168. .addReg(Base, getKillRegState(BaseKill))
  1169. .addImm(Pred).addReg(PredReg);
  1170. // Transfer the rest of operands.
  1171. for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
  1172. MIB.add(MI->getOperand(OpNum));
  1173. // Transfer memoperands.
  1174. MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
  1175. MBB.erase(MBBI);
  1176. return true;
  1177. }
  1178. static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
  1179. ARM_AM::AddrOpc Mode) {
  1180. switch (Opc) {
  1181. case ARM::LDRi12:
  1182. return ARM::LDR_PRE_IMM;
  1183. case ARM::STRi12:
  1184. return ARM::STR_PRE_IMM;
  1185. case ARM::VLDRS:
  1186. return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
  1187. case ARM::VLDRD:
  1188. return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
  1189. case ARM::VSTRS:
  1190. return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
  1191. case ARM::VSTRD:
  1192. return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
  1193. case ARM::t2LDRi8:
  1194. case ARM::t2LDRi12:
  1195. return ARM::t2LDR_PRE;
  1196. case ARM::t2STRi8:
  1197. case ARM::t2STRi12:
  1198. return ARM::t2STR_PRE;
  1199. default: llvm_unreachable("Unhandled opcode!");
  1200. }
  1201. }
  1202. static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
  1203. ARM_AM::AddrOpc Mode) {
  1204. switch (Opc) {
  1205. case ARM::LDRi12:
  1206. return ARM::LDR_POST_IMM;
  1207. case ARM::STRi12:
  1208. return ARM::STR_POST_IMM;
  1209. case ARM::VLDRS:
  1210. return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
  1211. case ARM::VLDRD:
  1212. return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
  1213. case ARM::VSTRS:
  1214. return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
  1215. case ARM::VSTRD:
  1216. return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
  1217. case ARM::t2LDRi8:
  1218. case ARM::t2LDRi12:
  1219. return ARM::t2LDR_POST;
  1220. case ARM::t2STRi8:
  1221. case ARM::t2STRi12:
  1222. return ARM::t2STR_POST;
  1223. default: llvm_unreachable("Unhandled opcode!");
  1224. }
  1225. }
  1226. /// Fold proceeding/trailing inc/dec of base register into the
  1227. /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
  1228. bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
  1229. // Thumb1 doesn't have updating LDR/STR.
  1230. // FIXME: Use LDM/STM with single register instead.
  1231. if (isThumb1) return false;
  1232. unsigned Base = getLoadStoreBaseOp(*MI).getReg();
  1233. bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
  1234. unsigned Opcode = MI->getOpcode();
  1235. DebugLoc DL = MI->getDebugLoc();
  1236. bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
  1237. Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
  1238. bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
  1239. if (isi32Load(Opcode) || isi32Store(Opcode))
  1240. if (MI->getOperand(2).getImm() != 0)
  1241. return false;
  1242. if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
  1243. return false;
  1244. // Can't do the merge if the destination register is the same as the would-be
  1245. // writeback register.
  1246. if (MI->getOperand(0).getReg() == Base)
  1247. return false;
  1248. unsigned PredReg = 0;
  1249. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1250. int Bytes = getLSMultipleTransferSize(MI);
  1251. MachineBasicBlock &MBB = *MI->getParent();
  1252. MachineBasicBlock::iterator MBBI(MI);
  1253. int Offset;
  1254. MachineBasicBlock::iterator MergeInstr
  1255. = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
  1256. unsigned NewOpc;
  1257. if (!isAM5 && Offset == Bytes) {
  1258. NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
  1259. } else if (Offset == -Bytes) {
  1260. NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
  1261. } else {
  1262. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
  1263. if (Offset == Bytes) {
  1264. NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
  1265. } else if (!isAM5 && Offset == -Bytes) {
  1266. NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
  1267. } else
  1268. return false;
  1269. }
  1270. MBB.erase(MergeInstr);
  1271. ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
  1272. bool isLd = isLoadSingle(Opcode);
  1273. if (isAM5) {
  1274. // VLDM[SD]_UPD, VSTM[SD]_UPD
  1275. // (There are no base-updating versions of VLDR/VSTR instructions, but the
  1276. // updating load/store-multiple instructions can be used with only one
  1277. // register.)
  1278. MachineOperand &MO = MI->getOperand(0);
  1279. BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
  1280. .addReg(Base, getDefRegState(true)) // WB base register
  1281. .addReg(Base, getKillRegState(isLd ? BaseKill : false))
  1282. .addImm(Pred).addReg(PredReg)
  1283. .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
  1284. getKillRegState(MO.isKill())));
  1285. } else if (isLd) {
  1286. if (isAM2) {
  1287. // LDR_PRE, LDR_POST
  1288. if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
  1289. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1290. .addReg(Base, RegState::Define)
  1291. .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
  1292. } else {
  1293. int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
  1294. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1295. .addReg(Base, RegState::Define)
  1296. .addReg(Base)
  1297. .addReg(0)
  1298. .addImm(Imm)
  1299. .add(predOps(Pred, PredReg));
  1300. }
  1301. } else {
  1302. // t2LDR_PRE, t2LDR_POST
  1303. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1304. .addReg(Base, RegState::Define)
  1305. .addReg(Base)
  1306. .addImm(Offset)
  1307. .add(predOps(Pred, PredReg));
  1308. }
  1309. } else {
  1310. MachineOperand &MO = MI->getOperand(0);
  1311. // FIXME: post-indexed stores use am2offset_imm, which still encodes
  1312. // the vestigal zero-reg offset register. When that's fixed, this clause
  1313. // can be removed entirely.
  1314. if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
  1315. int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
  1316. // STR_PRE, STR_POST
  1317. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
  1318. .addReg(MO.getReg(), getKillRegState(MO.isKill()))
  1319. .addReg(Base)
  1320. .addReg(0)
  1321. .addImm(Imm)
  1322. .add(predOps(Pred, PredReg));
  1323. } else {
  1324. // t2STR_PRE, t2STR_POST
  1325. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
  1326. .addReg(MO.getReg(), getKillRegState(MO.isKill()))
  1327. .addReg(Base)
  1328. .addImm(Offset)
  1329. .add(predOps(Pred, PredReg));
  1330. }
  1331. }
  1332. MBB.erase(MBBI);
  1333. return true;
  1334. }
  1335. bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
  1336. unsigned Opcode = MI.getOpcode();
  1337. assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
  1338. "Must have t2STRDi8 or t2LDRDi8");
  1339. if (MI.getOperand(3).getImm() != 0)
  1340. return false;
  1341. // Behaviour for writeback is undefined if base register is the same as one
  1342. // of the others.
  1343. const MachineOperand &BaseOp = MI.getOperand(2);
  1344. unsigned Base = BaseOp.getReg();
  1345. const MachineOperand &Reg0Op = MI.getOperand(0);
  1346. const MachineOperand &Reg1Op = MI.getOperand(1);
  1347. if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
  1348. return false;
  1349. unsigned PredReg;
  1350. ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
  1351. MachineBasicBlock::iterator MBBI(MI);
  1352. MachineBasicBlock &MBB = *MI.getParent();
  1353. int Offset;
  1354. MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
  1355. PredReg, Offset);
  1356. unsigned NewOpc;
  1357. if (Offset == 8 || Offset == -8) {
  1358. NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
  1359. } else {
  1360. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
  1361. if (Offset == 8 || Offset == -8) {
  1362. NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
  1363. } else
  1364. return false;
  1365. }
  1366. MBB.erase(MergeInstr);
  1367. DebugLoc DL = MI.getDebugLoc();
  1368. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
  1369. if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
  1370. MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
  1371. } else {
  1372. assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
  1373. MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
  1374. }
  1375. MIB.addReg(BaseOp.getReg(), RegState::Kill)
  1376. .addImm(Offset).addImm(Pred).addReg(PredReg);
  1377. assert(TII->get(Opcode).getNumOperands() == 6 &&
  1378. TII->get(NewOpc).getNumOperands() == 7 &&
  1379. "Unexpected number of operands in Opcode specification.");
  1380. // Transfer implicit operands.
  1381. for (const MachineOperand &MO : MI.implicit_operands())
  1382. MIB.add(MO);
  1383. MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
  1384. MBB.erase(MBBI);
  1385. return true;
  1386. }
  1387. /// Returns true if instruction is a memory operation that this pass is capable
  1388. /// of operating on.
  1389. static bool isMemoryOp(const MachineInstr &MI) {
  1390. unsigned Opcode = MI.getOpcode();
  1391. switch (Opcode) {
  1392. case ARM::VLDRS:
  1393. case ARM::VSTRS:
  1394. case ARM::VLDRD:
  1395. case ARM::VSTRD:
  1396. case ARM::LDRi12:
  1397. case ARM::STRi12:
  1398. case ARM::tLDRi:
  1399. case ARM::tSTRi:
  1400. case ARM::tLDRspi:
  1401. case ARM::tSTRspi:
  1402. case ARM::t2LDRi8:
  1403. case ARM::t2LDRi12:
  1404. case ARM::t2STRi8:
  1405. case ARM::t2STRi12:
  1406. break;
  1407. default:
  1408. return false;
  1409. }
  1410. if (!MI.getOperand(1).isReg())
  1411. return false;
  1412. // When no memory operands are present, conservatively assume unaligned,
  1413. // volatile, unfoldable.
  1414. if (!MI.hasOneMemOperand())
  1415. return false;
  1416. const MachineMemOperand &MMO = **MI.memoperands_begin();
  1417. // Don't touch volatile memory accesses - we may be changing their order.
  1418. if (MMO.isVolatile())
  1419. return false;
  1420. // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
  1421. // not.
  1422. if (MMO.getAlignment() < 4)
  1423. return false;
  1424. // str <undef> could probably be eliminated entirely, but for now we just want
  1425. // to avoid making a mess of it.
  1426. // FIXME: Use str <undef> as a wildcard to enable better stm folding.
  1427. if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
  1428. return false;
  1429. // Likewise don't mess with references to undefined addresses.
  1430. if (MI.getOperand(1).isUndef())
  1431. return false;
  1432. return true;
  1433. }
  1434. static void InsertLDR_STR(MachineBasicBlock &MBB,
  1435. MachineBasicBlock::iterator &MBBI, int Offset,
  1436. bool isDef, unsigned NewOpc, unsigned Reg,
  1437. bool RegDeadKill, bool RegUndef, unsigned BaseReg,
  1438. bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
  1439. unsigned PredReg, const TargetInstrInfo *TII) {
  1440. if (isDef) {
  1441. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
  1442. TII->get(NewOpc))
  1443. .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
  1444. .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
  1445. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  1446. } else {
  1447. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
  1448. TII->get(NewOpc))
  1449. .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
  1450. .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
  1451. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  1452. }
  1453. }
  1454. bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
  1455. MachineBasicBlock::iterator &MBBI) {
  1456. MachineInstr *MI = &*MBBI;
  1457. unsigned Opcode = MI->getOpcode();
  1458. // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
  1459. // if we see this opcode.
  1460. if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
  1461. return false;
  1462. const MachineOperand &BaseOp = MI->getOperand(2);
  1463. unsigned BaseReg = BaseOp.getReg();
  1464. unsigned EvenReg = MI->getOperand(0).getReg();
  1465. unsigned OddReg = MI->getOperand(1).getReg();
  1466. unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
  1467. unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
  1468. // ARM errata 602117: LDRD with base in list may result in incorrect base
  1469. // register when interrupted or faulted.
  1470. bool Errata602117 = EvenReg == BaseReg &&
  1471. (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
  1472. // ARM LDRD/STRD needs consecutive registers.
  1473. bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
  1474. (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
  1475. if (!Errata602117 && !NonConsecutiveRegs)
  1476. return false;
  1477. bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
  1478. bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
  1479. bool EvenDeadKill = isLd ?
  1480. MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
  1481. bool EvenUndef = MI->getOperand(0).isUndef();
  1482. bool OddDeadKill = isLd ?
  1483. MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
  1484. bool OddUndef = MI->getOperand(1).isUndef();
  1485. bool BaseKill = BaseOp.isKill();
  1486. bool BaseUndef = BaseOp.isUndef();
  1487. assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
  1488. "register offset not handled below");
  1489. int OffImm = getMemoryOpOffset(*MI);
  1490. unsigned PredReg = 0;
  1491. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1492. if (OddRegNum > EvenRegNum && OffImm == 0) {
  1493. // Ascending register numbers and no offset. It's safe to change it to a
  1494. // ldm or stm.
  1495. unsigned NewOpc = (isLd)
  1496. ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
  1497. : (isT2 ? ARM::t2STMIA : ARM::STMIA);
  1498. if (isLd) {
  1499. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  1500. .addReg(BaseReg, getKillRegState(BaseKill))
  1501. .addImm(Pred).addReg(PredReg)
  1502. .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
  1503. .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
  1504. ++NumLDRD2LDM;
  1505. } else {
  1506. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  1507. .addReg(BaseReg, getKillRegState(BaseKill))
  1508. .addImm(Pred).addReg(PredReg)
  1509. .addReg(EvenReg,
  1510. getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
  1511. .addReg(OddReg,
  1512. getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
  1513. ++NumSTRD2STM;
  1514. }
  1515. } else {
  1516. // Split into two instructions.
  1517. unsigned NewOpc = (isLd)
  1518. ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
  1519. : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
  1520. // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
  1521. // so adjust and use t2LDRi12 here for that.
  1522. unsigned NewOpc2 = (isLd)
  1523. ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
  1524. : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
  1525. // If this is a load, make sure the first load does not clobber the base
  1526. // register before the second load reads it.
  1527. if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
  1528. assert(!TRI->regsOverlap(OddReg, BaseReg));
  1529. InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
  1530. false, BaseReg, false, BaseUndef, Pred, PredReg, TII);
  1531. InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
  1532. false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
  1533. } else {
  1534. if (OddReg == EvenReg && EvenDeadKill) {
  1535. // If the two source operands are the same, the kill marker is
  1536. // probably on the first one. e.g.
  1537. // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
  1538. EvenDeadKill = false;
  1539. OddDeadKill = true;
  1540. }
  1541. // Never kill the base register in the first instruction.
  1542. if (EvenReg == BaseReg)
  1543. EvenDeadKill = false;
  1544. InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
  1545. EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII);
  1546. InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
  1547. OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII);
  1548. }
  1549. if (isLd)
  1550. ++NumLDRD2LDR;
  1551. else
  1552. ++NumSTRD2STR;
  1553. }
  1554. MBBI = MBB.erase(MBBI);
  1555. return true;
  1556. }
  1557. /// An optimization pass to turn multiple LDR / STR ops of the same base and
  1558. /// incrementing offset into LDM / STM ops.
  1559. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
  1560. MemOpQueue MemOps;
  1561. unsigned CurrBase = 0;
  1562. unsigned CurrOpc = ~0u;
  1563. ARMCC::CondCodes CurrPred = ARMCC::AL;
  1564. unsigned Position = 0;
  1565. assert(Candidates.size() == 0);
  1566. assert(MergeBaseCandidates.size() == 0);
  1567. LiveRegsValid = false;
  1568. for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
  1569. I = MBBI) {
  1570. // The instruction in front of the iterator is the one we look at.
  1571. MBBI = std::prev(I);
  1572. if (FixInvalidRegPairOp(MBB, MBBI))
  1573. continue;
  1574. ++Position;
  1575. if (isMemoryOp(*MBBI)) {
  1576. unsigned Opcode = MBBI->getOpcode();
  1577. const MachineOperand &MO = MBBI->getOperand(0);
  1578. unsigned Reg = MO.getReg();
  1579. unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
  1580. unsigned PredReg = 0;
  1581. ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
  1582. int Offset = getMemoryOpOffset(*MBBI);
  1583. if (CurrBase == 0) {
  1584. // Start of a new chain.
  1585. CurrBase = Base;
  1586. CurrOpc = Opcode;
  1587. CurrPred = Pred;
  1588. MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
  1589. continue;
  1590. }
  1591. // Note: No need to match PredReg in the next if.
  1592. if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
  1593. // Watch out for:
  1594. // r4 := ldr [r0, #8]
  1595. // r4 := ldr [r0, #4]
  1596. // or
  1597. // r0 := ldr [r0]
  1598. // If a load overrides the base register or a register loaded by
  1599. // another load in our chain, we cannot take this instruction.
  1600. bool Overlap = false;
  1601. if (isLoadSingle(Opcode)) {
  1602. Overlap = (Base == Reg);
  1603. if (!Overlap) {
  1604. for (const MemOpQueueEntry &E : MemOps) {
  1605. if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
  1606. Overlap = true;
  1607. break;
  1608. }
  1609. }
  1610. }
  1611. }
  1612. if (!Overlap) {
  1613. // Check offset and sort memory operation into the current chain.
  1614. if (Offset > MemOps.back().Offset) {
  1615. MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
  1616. continue;
  1617. } else {
  1618. MemOpQueue::iterator MI, ME;
  1619. for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
  1620. if (Offset < MI->Offset) {
  1621. // Found a place to insert.
  1622. break;
  1623. }
  1624. if (Offset == MI->Offset) {
  1625. // Collision, abort.
  1626. MI = ME;
  1627. break;
  1628. }
  1629. }
  1630. if (MI != MemOps.end()) {
  1631. MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
  1632. continue;
  1633. }
  1634. }
  1635. }
  1636. }
  1637. // Don't advance the iterator; The op will start a new chain next.
  1638. MBBI = I;
  1639. --Position;
  1640. // Fallthrough to look into existing chain.
  1641. } else if (MBBI->isDebugInstr()) {
  1642. continue;
  1643. } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
  1644. MBBI->getOpcode() == ARM::t2STRDi8) {
  1645. // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
  1646. // remember them because we may still be able to merge add/sub into them.
  1647. MergeBaseCandidates.push_back(&*MBBI);
  1648. }
  1649. // If we are here then the chain is broken; Extract candidates for a merge.
  1650. if (MemOps.size() > 0) {
  1651. FormCandidates(MemOps);
  1652. // Reset for the next chain.
  1653. CurrBase = 0;
  1654. CurrOpc = ~0u;
  1655. CurrPred = ARMCC::AL;
  1656. MemOps.clear();
  1657. }
  1658. }
  1659. if (MemOps.size() > 0)
  1660. FormCandidates(MemOps);
  1661. // Sort candidates so they get processed from end to begin of the basic
  1662. // block later; This is necessary for liveness calculation.
  1663. auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
  1664. return M0->InsertPos < M1->InsertPos;
  1665. };
  1666. llvm::sort(Candidates.begin(), Candidates.end(), LessThan);
  1667. // Go through list of candidates and merge.
  1668. bool Changed = false;
  1669. for (const MergeCandidate *Candidate : Candidates) {
  1670. if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
  1671. MachineInstr *Merged = MergeOpsUpdate(*Candidate);
  1672. // Merge preceding/trailing base inc/dec into the merged op.
  1673. if (Merged) {
  1674. Changed = true;
  1675. unsigned Opcode = Merged->getOpcode();
  1676. if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
  1677. MergeBaseUpdateLSDouble(*Merged);
  1678. else
  1679. MergeBaseUpdateLSMultiple(Merged);
  1680. } else {
  1681. for (MachineInstr *MI : Candidate->Instrs) {
  1682. if (MergeBaseUpdateLoadStore(MI))
  1683. Changed = true;
  1684. }
  1685. }
  1686. } else {
  1687. assert(Candidate->Instrs.size() == 1);
  1688. if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
  1689. Changed = true;
  1690. }
  1691. }
  1692. Candidates.clear();
  1693. // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
  1694. for (MachineInstr *MI : MergeBaseCandidates)
  1695. MergeBaseUpdateLSDouble(*MI);
  1696. MergeBaseCandidates.clear();
  1697. return Changed;
  1698. }
  1699. /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
  1700. /// into the preceding stack restore so it directly restore the value of LR
  1701. /// into pc.
  1702. /// ldmfd sp!, {..., lr}
  1703. /// bx lr
  1704. /// or
  1705. /// ldmfd sp!, {..., lr}
  1706. /// mov pc, lr
  1707. /// =>
  1708. /// ldmfd sp!, {..., pc}
  1709. bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
  1710. // Thumb1 LDM doesn't allow high registers.
  1711. if (isThumb1) return false;
  1712. if (MBB.empty()) return false;
  1713. MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  1714. if (MBBI != MBB.begin() && MBBI != MBB.end() &&
  1715. (MBBI->getOpcode() == ARM::BX_RET ||
  1716. MBBI->getOpcode() == ARM::tBX_RET ||
  1717. MBBI->getOpcode() == ARM::MOVPCLR)) {
  1718. MachineBasicBlock::iterator PrevI = std::prev(MBBI);
  1719. // Ignore any debug instructions.
  1720. while (PrevI->isDebugInstr() && PrevI != MBB.begin())
  1721. --PrevI;
  1722. MachineInstr &PrevMI = *PrevI;
  1723. unsigned Opcode = PrevMI.getOpcode();
  1724. if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
  1725. Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
  1726. Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
  1727. MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
  1728. if (MO.getReg() != ARM::LR)
  1729. return false;
  1730. unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
  1731. assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
  1732. Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
  1733. PrevMI.setDesc(TII->get(NewOpc));
  1734. MO.setReg(ARM::PC);
  1735. PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
  1736. MBB.erase(MBBI);
  1737. // We now restore LR into PC so it is not live-out of the return block
  1738. // anymore: Clear the CSI Restored bit.
  1739. MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo();
  1740. // CSI should be fixed after PrologEpilog Insertion
  1741. assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid");
  1742. for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
  1743. if (Info.getReg() == ARM::LR) {
  1744. Info.setRestored(false);
  1745. break;
  1746. }
  1747. }
  1748. return true;
  1749. }
  1750. }
  1751. return false;
  1752. }
  1753. bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
  1754. MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
  1755. if (MBBI == MBB.begin() || MBBI == MBB.end() ||
  1756. MBBI->getOpcode() != ARM::tBX_RET)
  1757. return false;
  1758. MachineBasicBlock::iterator Prev = MBBI;
  1759. --Prev;
  1760. if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
  1761. return false;
  1762. for (auto Use : Prev->uses())
  1763. if (Use.isKill()) {
  1764. assert(STI->hasV4TOps());
  1765. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
  1766. .addReg(Use.getReg(), RegState::Kill)
  1767. .add(predOps(ARMCC::AL))
  1768. .copyImplicitOps(*MBBI);
  1769. MBB.erase(MBBI);
  1770. MBB.erase(Prev);
  1771. return true;
  1772. }
  1773. llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
  1774. }
  1775. bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  1776. if (skipFunction(Fn.getFunction()))
  1777. return false;
  1778. MF = &Fn;
  1779. STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
  1780. TL = STI->getTargetLowering();
  1781. AFI = Fn.getInfo<ARMFunctionInfo>();
  1782. TII = STI->getInstrInfo();
  1783. TRI = STI->getRegisterInfo();
  1784. RegClassInfoValid = false;
  1785. isThumb2 = AFI->isThumb2Function();
  1786. isThumb1 = AFI->isThumbFunction() && !isThumb2;
  1787. bool Modified = false;
  1788. for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
  1789. ++MFI) {
  1790. MachineBasicBlock &MBB = *MFI;
  1791. Modified |= LoadStoreMultipleOpti(MBB);
  1792. if (STI->hasV5TOps())
  1793. Modified |= MergeReturnIntoLDM(MBB);
  1794. if (isThumb1)
  1795. Modified |= CombineMovBx(MBB);
  1796. }
  1797. Allocator.DestroyAll();
  1798. return Modified;
  1799. }
  1800. #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
  1801. "ARM pre- register allocation load / store optimization pass"
  1802. namespace {
  1803. /// Pre- register allocation pass that move load / stores from consecutive
  1804. /// locations close to make it more likely they will be combined later.
  1805. struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
  1806. static char ID;
  1807. AliasAnalysis *AA;
  1808. const DataLayout *TD;
  1809. const TargetInstrInfo *TII;
  1810. const TargetRegisterInfo *TRI;
  1811. const ARMSubtarget *STI;
  1812. MachineRegisterInfo *MRI;
  1813. MachineFunction *MF;
  1814. ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
  1815. bool runOnMachineFunction(MachineFunction &Fn) override;
  1816. StringRef getPassName() const override {
  1817. return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
  1818. }
  1819. void getAnalysisUsage(AnalysisUsage &AU) const override {
  1820. AU.addRequired<AAResultsWrapperPass>();
  1821. MachineFunctionPass::getAnalysisUsage(AU);
  1822. }
  1823. private:
  1824. bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
  1825. unsigned &NewOpc, unsigned &EvenReg,
  1826. unsigned &OddReg, unsigned &BaseReg,
  1827. int &Offset,
  1828. unsigned &PredReg, ARMCC::CondCodes &Pred,
  1829. bool &isT2);
  1830. bool RescheduleOps(MachineBasicBlock *MBB,
  1831. SmallVectorImpl<MachineInstr *> &Ops,
  1832. unsigned Base, bool isLd,
  1833. DenseMap<MachineInstr*, unsigned> &MI2LocMap);
  1834. bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
  1835. };
  1836. } // end anonymous namespace
  1837. char ARMPreAllocLoadStoreOpt::ID = 0;
  1838. INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
  1839. ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
  1840. bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  1841. if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
  1842. return false;
  1843. TD = &Fn.getDataLayout();
  1844. STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
  1845. TII = STI->getInstrInfo();
  1846. TRI = STI->getRegisterInfo();
  1847. MRI = &Fn.getRegInfo();
  1848. MF = &Fn;
  1849. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  1850. bool Modified = false;
  1851. for (MachineBasicBlock &MFI : Fn)
  1852. Modified |= RescheduleLoadStoreInstrs(&MFI);
  1853. return Modified;
  1854. }
  1855. static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
  1856. MachineBasicBlock::iterator I,
  1857. MachineBasicBlock::iterator E,
  1858. SmallPtrSetImpl<MachineInstr*> &MemOps,
  1859. SmallSet<unsigned, 4> &MemRegs,
  1860. const TargetRegisterInfo *TRI,
  1861. AliasAnalysis *AA) {
  1862. // Are there stores / loads / calls between them?
  1863. SmallSet<unsigned, 4> AddedRegPressure;
  1864. while (++I != E) {
  1865. if (I->isDebugInstr() || MemOps.count(&*I))
  1866. continue;
  1867. if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
  1868. return false;
  1869. if (I->mayStore() || (!isLd && I->mayLoad()))
  1870. for (MachineInstr *MemOp : MemOps)
  1871. if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
  1872. return false;
  1873. for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
  1874. MachineOperand &MO = I->getOperand(j);
  1875. if (!MO.isReg())
  1876. continue;
  1877. unsigned Reg = MO.getReg();
  1878. if (MO.isDef() && TRI->regsOverlap(Reg, Base))
  1879. return false;
  1880. if (Reg != Base && !MemRegs.count(Reg))
  1881. AddedRegPressure.insert(Reg);
  1882. }
  1883. }
  1884. // Estimate register pressure increase due to the transformation.
  1885. if (MemRegs.size() <= 4)
  1886. // Ok if we are moving small number of instructions.
  1887. return true;
  1888. return AddedRegPressure.size() <= MemRegs.size() * 2;
  1889. }
  1890. bool
  1891. ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
  1892. DebugLoc &dl, unsigned &NewOpc,
  1893. unsigned &FirstReg,
  1894. unsigned &SecondReg,
  1895. unsigned &BaseReg, int &Offset,
  1896. unsigned &PredReg,
  1897. ARMCC::CondCodes &Pred,
  1898. bool &isT2) {
  1899. // Make sure we're allowed to generate LDRD/STRD.
  1900. if (!STI->hasV5TEOps())
  1901. return false;
  1902. // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
  1903. unsigned Scale = 1;
  1904. unsigned Opcode = Op0->getOpcode();
  1905. if (Opcode == ARM::LDRi12) {
  1906. NewOpc = ARM::LDRD;
  1907. } else if (Opcode == ARM::STRi12) {
  1908. NewOpc = ARM::STRD;
  1909. } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
  1910. NewOpc = ARM::t2LDRDi8;
  1911. Scale = 4;
  1912. isT2 = true;
  1913. } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
  1914. NewOpc = ARM::t2STRDi8;
  1915. Scale = 4;
  1916. isT2 = true;
  1917. } else {
  1918. return false;
  1919. }
  1920. // Make sure the base address satisfies i64 ld / st alignment requirement.
  1921. // At the moment, we ignore the memoryoperand's value.
  1922. // If we want to use AliasAnalysis, we should check it accordingly.
  1923. if (!Op0->hasOneMemOperand() ||
  1924. (*Op0->memoperands_begin())->isVolatile())
  1925. return false;
  1926. unsigned Align = (*Op0->memoperands_begin())->getAlignment();
  1927. const Function &Func = MF->getFunction();
  1928. unsigned ReqAlign = STI->hasV6Ops()
  1929. ? TD->getABITypeAlignment(Type::getInt64Ty(Func.getContext()))
  1930. : 8; // Pre-v6 need 8-byte align
  1931. if (Align < ReqAlign)
  1932. return false;
  1933. // Then make sure the immediate offset fits.
  1934. int OffImm = getMemoryOpOffset(*Op0);
  1935. if (isT2) {
  1936. int Limit = (1 << 8) * Scale;
  1937. if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
  1938. return false;
  1939. Offset = OffImm;
  1940. } else {
  1941. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  1942. if (OffImm < 0) {
  1943. AddSub = ARM_AM::sub;
  1944. OffImm = - OffImm;
  1945. }
  1946. int Limit = (1 << 8) * Scale;
  1947. if (OffImm >= Limit || (OffImm & (Scale-1)))
  1948. return false;
  1949. Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
  1950. }
  1951. FirstReg = Op0->getOperand(0).getReg();
  1952. SecondReg = Op1->getOperand(0).getReg();
  1953. if (FirstReg == SecondReg)
  1954. return false;
  1955. BaseReg = Op0->getOperand(1).getReg();
  1956. Pred = getInstrPredicate(*Op0, PredReg);
  1957. dl = Op0->getDebugLoc();
  1958. return true;
  1959. }
  1960. bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
  1961. SmallVectorImpl<MachineInstr *> &Ops,
  1962. unsigned Base, bool isLd,
  1963. DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
  1964. bool RetVal = false;
  1965. // Sort by offset (in reverse order).
  1966. llvm::sort(Ops.begin(), Ops.end(),
  1967. [](const MachineInstr *LHS, const MachineInstr *RHS) {
  1968. int LOffset = getMemoryOpOffset(*LHS);
  1969. int ROffset = getMemoryOpOffset(*RHS);
  1970. assert(LHS == RHS || LOffset != ROffset);
  1971. return LOffset > ROffset;
  1972. });
  1973. // The loads / stores of the same base are in order. Scan them from first to
  1974. // last and check for the following:
  1975. // 1. Any def of base.
  1976. // 2. Any gaps.
  1977. while (Ops.size() > 1) {
  1978. unsigned FirstLoc = ~0U;
  1979. unsigned LastLoc = 0;
  1980. MachineInstr *FirstOp = nullptr;
  1981. MachineInstr *LastOp = nullptr;
  1982. int LastOffset = 0;
  1983. unsigned LastOpcode = 0;
  1984. unsigned LastBytes = 0;
  1985. unsigned NumMove = 0;
  1986. for (int i = Ops.size() - 1; i >= 0; --i) {
  1987. // Make sure each operation has the same kind.
  1988. MachineInstr *Op = Ops[i];
  1989. unsigned LSMOpcode
  1990. = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
  1991. if (LastOpcode && LSMOpcode != LastOpcode)
  1992. break;
  1993. // Check that we have a continuous set of offsets.
  1994. int Offset = getMemoryOpOffset(*Op);
  1995. unsigned Bytes = getLSMultipleTransferSize(Op);
  1996. if (LastBytes) {
  1997. if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
  1998. break;
  1999. }
  2000. // Don't try to reschedule too many instructions.
  2001. if (NumMove == 8) // FIXME: Tune this limit.
  2002. break;
  2003. // Found a mergable instruction; save information about it.
  2004. ++NumMove;
  2005. LastOffset = Offset;
  2006. LastBytes = Bytes;
  2007. LastOpcode = LSMOpcode;
  2008. unsigned Loc = MI2LocMap[Op];
  2009. if (Loc <= FirstLoc) {
  2010. FirstLoc = Loc;
  2011. FirstOp = Op;
  2012. }
  2013. if (Loc >= LastLoc) {
  2014. LastLoc = Loc;
  2015. LastOp = Op;
  2016. }
  2017. }
  2018. if (NumMove <= 1)
  2019. Ops.pop_back();
  2020. else {
  2021. SmallPtrSet<MachineInstr*, 4> MemOps;
  2022. SmallSet<unsigned, 4> MemRegs;
  2023. for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
  2024. MemOps.insert(Ops[i]);
  2025. MemRegs.insert(Ops[i]->getOperand(0).getReg());
  2026. }
  2027. // Be conservative, if the instructions are too far apart, don't
  2028. // move them. We want to limit the increase of register pressure.
  2029. bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
  2030. if (DoMove)
  2031. DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
  2032. MemOps, MemRegs, TRI, AA);
  2033. if (!DoMove) {
  2034. for (unsigned i = 0; i != NumMove; ++i)
  2035. Ops.pop_back();
  2036. } else {
  2037. // This is the new location for the loads / stores.
  2038. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
  2039. while (InsertPos != MBB->end() &&
  2040. (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
  2041. ++InsertPos;
  2042. // If we are moving a pair of loads / stores, see if it makes sense
  2043. // to try to allocate a pair of registers that can form register pairs.
  2044. MachineInstr *Op0 = Ops.back();
  2045. MachineInstr *Op1 = Ops[Ops.size()-2];
  2046. unsigned FirstReg = 0, SecondReg = 0;
  2047. unsigned BaseReg = 0, PredReg = 0;
  2048. ARMCC::CondCodes Pred = ARMCC::AL;
  2049. bool isT2 = false;
  2050. unsigned NewOpc = 0;
  2051. int Offset = 0;
  2052. DebugLoc dl;
  2053. if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
  2054. FirstReg, SecondReg, BaseReg,
  2055. Offset, PredReg, Pred, isT2)) {
  2056. Ops.pop_back();
  2057. Ops.pop_back();
  2058. const MCInstrDesc &MCID = TII->get(NewOpc);
  2059. const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
  2060. MRI->constrainRegClass(FirstReg, TRC);
  2061. MRI->constrainRegClass(SecondReg, TRC);
  2062. // Form the pair instruction.
  2063. if (isLd) {
  2064. MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
  2065. .addReg(FirstReg, RegState::Define)
  2066. .addReg(SecondReg, RegState::Define)
  2067. .addReg(BaseReg);
  2068. // FIXME: We're converting from LDRi12 to an insn that still
  2069. // uses addrmode2, so we need an explicit offset reg. It should
  2070. // always by reg0 since we're transforming LDRi12s.
  2071. if (!isT2)
  2072. MIB.addReg(0);
  2073. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  2074. MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
  2075. DEBUG(dbgs() << "Formed " << *MIB << "\n");
  2076. ++NumLDRDFormed;
  2077. } else {
  2078. MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
  2079. .addReg(FirstReg)
  2080. .addReg(SecondReg)
  2081. .addReg(BaseReg);
  2082. // FIXME: We're converting from LDRi12 to an insn that still
  2083. // uses addrmode2, so we need an explicit offset reg. It should
  2084. // always by reg0 since we're transforming STRi12s.
  2085. if (!isT2)
  2086. MIB.addReg(0);
  2087. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  2088. MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
  2089. DEBUG(dbgs() << "Formed " << *MIB << "\n");
  2090. ++NumSTRDFormed;
  2091. }
  2092. MBB->erase(Op0);
  2093. MBB->erase(Op1);
  2094. if (!isT2) {
  2095. // Add register allocation hints to form register pairs.
  2096. MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
  2097. MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
  2098. }
  2099. } else {
  2100. for (unsigned i = 0; i != NumMove; ++i) {
  2101. MachineInstr *Op = Ops.back();
  2102. Ops.pop_back();
  2103. MBB->splice(InsertPos, MBB, Op);
  2104. }
  2105. }
  2106. NumLdStMoved += NumMove;
  2107. RetVal = true;
  2108. }
  2109. }
  2110. }
  2111. return RetVal;
  2112. }
  2113. bool
  2114. ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
  2115. bool RetVal = false;
  2116. DenseMap<MachineInstr*, unsigned> MI2LocMap;
  2117. DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2LdsMap;
  2118. DenseMap<unsigned, SmallVector<MachineInstr *, 4>> Base2StsMap;
  2119. SmallVector<unsigned, 4> LdBases;
  2120. SmallVector<unsigned, 4> StBases;
  2121. unsigned Loc = 0;
  2122. MachineBasicBlock::iterator MBBI = MBB->begin();
  2123. MachineBasicBlock::iterator E = MBB->end();
  2124. while (MBBI != E) {
  2125. for (; MBBI != E; ++MBBI) {
  2126. MachineInstr &MI = *MBBI;
  2127. if (MI.isCall() || MI.isTerminator()) {
  2128. // Stop at barriers.
  2129. ++MBBI;
  2130. break;
  2131. }
  2132. if (!MI.isDebugInstr())
  2133. MI2LocMap[&MI] = ++Loc;
  2134. if (!isMemoryOp(MI))
  2135. continue;
  2136. unsigned PredReg = 0;
  2137. if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
  2138. continue;
  2139. int Opc = MI.getOpcode();
  2140. bool isLd = isLoadSingle(Opc);
  2141. unsigned Base = MI.getOperand(1).getReg();
  2142. int Offset = getMemoryOpOffset(MI);
  2143. bool StopHere = false;
  2144. if (isLd) {
  2145. DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
  2146. Base2LdsMap.find(Base);
  2147. if (BI != Base2LdsMap.end()) {
  2148. for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
  2149. if (Offset == getMemoryOpOffset(*BI->second[i])) {
  2150. StopHere = true;
  2151. break;
  2152. }
  2153. }
  2154. if (!StopHere)
  2155. BI->second.push_back(&MI);
  2156. } else {
  2157. Base2LdsMap[Base].push_back(&MI);
  2158. LdBases.push_back(Base);
  2159. }
  2160. } else {
  2161. DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator BI =
  2162. Base2StsMap.find(Base);
  2163. if (BI != Base2StsMap.end()) {
  2164. for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
  2165. if (Offset == getMemoryOpOffset(*BI->second[i])) {
  2166. StopHere = true;
  2167. break;
  2168. }
  2169. }
  2170. if (!StopHere)
  2171. BI->second.push_back(&MI);
  2172. } else {
  2173. Base2StsMap[Base].push_back(&MI);
  2174. StBases.push_back(Base);
  2175. }
  2176. }
  2177. if (StopHere) {
  2178. // Found a duplicate (a base+offset combination that's seen earlier).
  2179. // Backtrack.
  2180. --Loc;
  2181. break;
  2182. }
  2183. }
  2184. // Re-schedule loads.
  2185. for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
  2186. unsigned Base = LdBases[i];
  2187. SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
  2188. if (Lds.size() > 1)
  2189. RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
  2190. }
  2191. // Re-schedule stores.
  2192. for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
  2193. unsigned Base = StBases[i];
  2194. SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
  2195. if (Sts.size() > 1)
  2196. RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
  2197. }
  2198. if (MBBI != E) {
  2199. Base2LdsMap.clear();
  2200. Base2StsMap.clear();
  2201. LdBases.clear();
  2202. StBases.clear();
  2203. }
  2204. }
  2205. return RetVal;
  2206. }
  2207. /// Returns an instance of the load / store optimization pass.
  2208. FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
  2209. if (PreAlloc)
  2210. return new ARMPreAllocLoadStoreOpt();
  2211. return new ARMLoadStoreOpt();
  2212. }