ARMLoadStoreOptimizer.cpp 86 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483
  1. //===- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. /// \file This file contains a pass that performs load / store related peephole
  10. /// optimizations. This pass should be run after register allocation.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "ARM.h"
  14. #include "ARMBaseInstrInfo.h"
  15. #include "ARMBaseRegisterInfo.h"
  16. #include "ARMISelLowering.h"
  17. #include "ARMMachineFunctionInfo.h"
  18. #include "ARMSubtarget.h"
  19. #include "MCTargetDesc/ARMAddressingModes.h"
  20. #include "MCTargetDesc/ARMBaseInfo.h"
  21. #include "Utils/ARMBaseInfo.h"
  22. #include "llvm/ADT/ArrayRef.h"
  23. #include "llvm/ADT/DenseMap.h"
  24. #include "llvm/ADT/DenseSet.h"
  25. #include "llvm/ADT/STLExtras.h"
  26. #include "llvm/ADT/SmallPtrSet.h"
  27. #include "llvm/ADT/SmallSet.h"
  28. #include "llvm/ADT/SmallVector.h"
  29. #include "llvm/ADT/Statistic.h"
  30. #include "llvm/ADT/iterator_range.h"
  31. #include "llvm/Analysis/AliasAnalysis.h"
  32. #include "llvm/CodeGen/LivePhysRegs.h"
  33. #include "llvm/CodeGen/MachineBasicBlock.h"
  34. #include "llvm/CodeGen/MachineFunction.h"
  35. #include "llvm/CodeGen/MachineFunctionPass.h"
  36. #include "llvm/CodeGen/MachineInstr.h"
  37. #include "llvm/CodeGen/MachineInstrBuilder.h"
  38. #include "llvm/CodeGen/MachineMemOperand.h"
  39. #include "llvm/CodeGen/MachineOperand.h"
  40. #include "llvm/CodeGen/MachineRegisterInfo.h"
  41. #include "llvm/CodeGen/RegisterClassInfo.h"
  42. #include "llvm/CodeGen/TargetFrameLowering.h"
  43. #include "llvm/CodeGen/TargetInstrInfo.h"
  44. #include "llvm/CodeGen/TargetLowering.h"
  45. #include "llvm/CodeGen/TargetRegisterInfo.h"
  46. #include "llvm/CodeGen/TargetSubtargetInfo.h"
  47. #include "llvm/IR/DataLayout.h"
  48. #include "llvm/IR/DebugLoc.h"
  49. #include "llvm/IR/DerivedTypes.h"
  50. #include "llvm/IR/Function.h"
  51. #include "llvm/IR/Type.h"
  52. #include "llvm/MC/MCInstrDesc.h"
  53. #include "llvm/Pass.h"
  54. #include "llvm/Support/Allocator.h"
  55. #include "llvm/Support/CommandLine.h"
  56. #include "llvm/Support/Debug.h"
  57. #include "llvm/Support/ErrorHandling.h"
  58. #include "llvm/Support/raw_ostream.h"
  59. #include <algorithm>
  60. #include <cassert>
  61. #include <cstddef>
  62. #include <cstdlib>
  63. #include <iterator>
  64. #include <limits>
  65. #include <utility>
  66. using namespace llvm;
  67. #define DEBUG_TYPE "arm-ldst-opt"
  68. STATISTIC(NumLDMGened , "Number of ldm instructions generated");
  69. STATISTIC(NumSTMGened , "Number of stm instructions generated");
  70. STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
  71. STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
  72. STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
  73. STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
  74. STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
  75. STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
  76. STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
  77. STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
  78. STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
  79. /// This switch disables formation of double/multi instructions that could
  80. /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
  81. /// disabled. This can be used to create libraries that are robust even when
  82. /// users provoke undefined behaviour by supplying misaligned pointers.
  83. /// \see mayCombineMisaligned()
  84. static cl::opt<bool>
  85. AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
  86. cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
  87. #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
  88. namespace {
  89. /// Post- register allocation pass the combine load / store instructions to
  90. /// form ldm / stm instructions.
  91. struct ARMLoadStoreOpt : public MachineFunctionPass {
  92. static char ID;
  93. const MachineFunction *MF;
  94. const TargetInstrInfo *TII;
  95. const TargetRegisterInfo *TRI;
  96. const ARMSubtarget *STI;
  97. const TargetLowering *TL;
  98. ARMFunctionInfo *AFI;
  99. LivePhysRegs LiveRegs;
  100. RegisterClassInfo RegClassInfo;
  101. MachineBasicBlock::const_iterator LiveRegPos;
  102. bool LiveRegsValid;
  103. bool RegClassInfoValid;
  104. bool isThumb1, isThumb2;
  105. ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
  106. bool runOnMachineFunction(MachineFunction &Fn) override;
  107. MachineFunctionProperties getRequiredProperties() const override {
  108. return MachineFunctionProperties().set(
  109. MachineFunctionProperties::Property::NoVRegs);
  110. }
  111. StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
  112. private:
  113. /// A set of load/store MachineInstrs with same base register sorted by
  114. /// offset.
  115. struct MemOpQueueEntry {
  116. MachineInstr *MI;
  117. int Offset; ///< Load/Store offset.
  118. unsigned Position; ///< Position as counted from end of basic block.
  119. MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
  120. : MI(&MI), Offset(Offset), Position(Position) {}
  121. };
  122. using MemOpQueue = SmallVector<MemOpQueueEntry, 8>;
  123. /// A set of MachineInstrs that fulfill (nearly all) conditions to get
  124. /// merged into a LDM/STM.
  125. struct MergeCandidate {
  126. /// List of instructions ordered by load/store offset.
  127. SmallVector<MachineInstr*, 4> Instrs;
  128. /// Index in Instrs of the instruction being latest in the schedule.
  129. unsigned LatestMIIdx;
  130. /// Index in Instrs of the instruction being earliest in the schedule.
  131. unsigned EarliestMIIdx;
  132. /// Index into the basic block where the merged instruction will be
  133. /// inserted. (See MemOpQueueEntry.Position)
  134. unsigned InsertPos;
  135. /// Whether the instructions can be merged into a ldm/stm instruction.
  136. bool CanMergeToLSMulti;
  137. /// Whether the instructions can be merged into a ldrd/strd instruction.
  138. bool CanMergeToLSDouble;
  139. };
  140. SpecificBumpPtrAllocator<MergeCandidate> Allocator;
  141. SmallVector<const MergeCandidate*,4> Candidates;
  142. SmallVector<MachineInstr*,4> MergeBaseCandidates;
  143. void moveLiveRegsBefore(const MachineBasicBlock &MBB,
  144. MachineBasicBlock::const_iterator Before);
  145. unsigned findFreeReg(const TargetRegisterClass &RegClass);
  146. void UpdateBaseRegUses(MachineBasicBlock &MBB,
  147. MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
  148. unsigned Base, unsigned WordOffset,
  149. ARMCC::CondCodes Pred, unsigned PredReg);
  150. MachineInstr *CreateLoadStoreMulti(
  151. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  152. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  153. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  154. ArrayRef<std::pair<unsigned, bool>> Regs,
  155. ArrayRef<MachineInstr*> Instrs);
  156. MachineInstr *CreateLoadStoreDouble(
  157. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  158. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  159. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  160. ArrayRef<std::pair<unsigned, bool>> Regs,
  161. ArrayRef<MachineInstr*> Instrs) const;
  162. void FormCandidates(const MemOpQueue &MemOps);
  163. MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
  164. bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
  165. MachineBasicBlock::iterator &MBBI);
  166. bool MergeBaseUpdateLoadStore(MachineInstr *MI);
  167. bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
  168. bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
  169. bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
  170. bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
  171. bool CombineMovBx(MachineBasicBlock &MBB);
  172. };
  173. } // end anonymous namespace
  174. char ARMLoadStoreOpt::ID = 0;
  175. INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
  176. false)
  177. static bool definesCPSR(const MachineInstr &MI) {
  178. for (const auto &MO : MI.operands()) {
  179. if (!MO.isReg())
  180. continue;
  181. if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
  182. // If the instruction has live CPSR def, then it's not safe to fold it
  183. // into load / store.
  184. return true;
  185. }
  186. return false;
  187. }
  188. static int getMemoryOpOffset(const MachineInstr &MI) {
  189. unsigned Opcode = MI.getOpcode();
  190. bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
  191. unsigned NumOperands = MI.getDesc().getNumOperands();
  192. unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
  193. if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
  194. Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
  195. Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
  196. Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
  197. return OffField;
  198. // Thumb1 immediate offsets are scaled by 4
  199. if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
  200. Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
  201. return OffField * 4;
  202. int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
  203. : ARM_AM::getAM5Offset(OffField) * 4;
  204. ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
  205. : ARM_AM::getAM5Op(OffField);
  206. if (Op == ARM_AM::sub)
  207. return -Offset;
  208. return Offset;
  209. }
  210. static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
  211. return MI.getOperand(1);
  212. }
  213. static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
  214. return MI.getOperand(0);
  215. }
  216. static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
  217. switch (Opcode) {
  218. default: llvm_unreachable("Unhandled opcode!");
  219. case ARM::LDRi12:
  220. ++NumLDMGened;
  221. switch (Mode) {
  222. default: llvm_unreachable("Unhandled submode!");
  223. case ARM_AM::ia: return ARM::LDMIA;
  224. case ARM_AM::da: return ARM::LDMDA;
  225. case ARM_AM::db: return ARM::LDMDB;
  226. case ARM_AM::ib: return ARM::LDMIB;
  227. }
  228. case ARM::STRi12:
  229. ++NumSTMGened;
  230. switch (Mode) {
  231. default: llvm_unreachable("Unhandled submode!");
  232. case ARM_AM::ia: return ARM::STMIA;
  233. case ARM_AM::da: return ARM::STMDA;
  234. case ARM_AM::db: return ARM::STMDB;
  235. case ARM_AM::ib: return ARM::STMIB;
  236. }
  237. case ARM::tLDRi:
  238. case ARM::tLDRspi:
  239. // tLDMIA is writeback-only - unless the base register is in the input
  240. // reglist.
  241. ++NumLDMGened;
  242. switch (Mode) {
  243. default: llvm_unreachable("Unhandled submode!");
  244. case ARM_AM::ia: return ARM::tLDMIA;
  245. }
  246. case ARM::tSTRi:
  247. case ARM::tSTRspi:
  248. // There is no non-writeback tSTMIA either.
  249. ++NumSTMGened;
  250. switch (Mode) {
  251. default: llvm_unreachable("Unhandled submode!");
  252. case ARM_AM::ia: return ARM::tSTMIA_UPD;
  253. }
  254. case ARM::t2LDRi8:
  255. case ARM::t2LDRi12:
  256. ++NumLDMGened;
  257. switch (Mode) {
  258. default: llvm_unreachable("Unhandled submode!");
  259. case ARM_AM::ia: return ARM::t2LDMIA;
  260. case ARM_AM::db: return ARM::t2LDMDB;
  261. }
  262. case ARM::t2STRi8:
  263. case ARM::t2STRi12:
  264. ++NumSTMGened;
  265. switch (Mode) {
  266. default: llvm_unreachable("Unhandled submode!");
  267. case ARM_AM::ia: return ARM::t2STMIA;
  268. case ARM_AM::db: return ARM::t2STMDB;
  269. }
  270. case ARM::VLDRS:
  271. ++NumVLDMGened;
  272. switch (Mode) {
  273. default: llvm_unreachable("Unhandled submode!");
  274. case ARM_AM::ia: return ARM::VLDMSIA;
  275. case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
  276. }
  277. case ARM::VSTRS:
  278. ++NumVSTMGened;
  279. switch (Mode) {
  280. default: llvm_unreachable("Unhandled submode!");
  281. case ARM_AM::ia: return ARM::VSTMSIA;
  282. case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
  283. }
  284. case ARM::VLDRD:
  285. ++NumVLDMGened;
  286. switch (Mode) {
  287. default: llvm_unreachable("Unhandled submode!");
  288. case ARM_AM::ia: return ARM::VLDMDIA;
  289. case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
  290. }
  291. case ARM::VSTRD:
  292. ++NumVSTMGened;
  293. switch (Mode) {
  294. default: llvm_unreachable("Unhandled submode!");
  295. case ARM_AM::ia: return ARM::VSTMDIA;
  296. case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
  297. }
  298. }
  299. }
  300. static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
  301. switch (Opcode) {
  302. default: llvm_unreachable("Unhandled opcode!");
  303. case ARM::LDMIA_RET:
  304. case ARM::LDMIA:
  305. case ARM::LDMIA_UPD:
  306. case ARM::STMIA:
  307. case ARM::STMIA_UPD:
  308. case ARM::tLDMIA:
  309. case ARM::tLDMIA_UPD:
  310. case ARM::tSTMIA_UPD:
  311. case ARM::t2LDMIA_RET:
  312. case ARM::t2LDMIA:
  313. case ARM::t2LDMIA_UPD:
  314. case ARM::t2STMIA:
  315. case ARM::t2STMIA_UPD:
  316. case ARM::VLDMSIA:
  317. case ARM::VLDMSIA_UPD:
  318. case ARM::VSTMSIA:
  319. case ARM::VSTMSIA_UPD:
  320. case ARM::VLDMDIA:
  321. case ARM::VLDMDIA_UPD:
  322. case ARM::VSTMDIA:
  323. case ARM::VSTMDIA_UPD:
  324. return ARM_AM::ia;
  325. case ARM::LDMDA:
  326. case ARM::LDMDA_UPD:
  327. case ARM::STMDA:
  328. case ARM::STMDA_UPD:
  329. return ARM_AM::da;
  330. case ARM::LDMDB:
  331. case ARM::LDMDB_UPD:
  332. case ARM::STMDB:
  333. case ARM::STMDB_UPD:
  334. case ARM::t2LDMDB:
  335. case ARM::t2LDMDB_UPD:
  336. case ARM::t2STMDB:
  337. case ARM::t2STMDB_UPD:
  338. case ARM::VLDMSDB_UPD:
  339. case ARM::VSTMSDB_UPD:
  340. case ARM::VLDMDDB_UPD:
  341. case ARM::VSTMDDB_UPD:
  342. return ARM_AM::db;
  343. case ARM::LDMIB:
  344. case ARM::LDMIB_UPD:
  345. case ARM::STMIB:
  346. case ARM::STMIB_UPD:
  347. return ARM_AM::ib;
  348. }
  349. }
  350. static bool isT1i32Load(unsigned Opc) {
  351. return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
  352. }
  353. static bool isT2i32Load(unsigned Opc) {
  354. return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
  355. }
  356. static bool isi32Load(unsigned Opc) {
  357. return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
  358. }
  359. static bool isT1i32Store(unsigned Opc) {
  360. return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
  361. }
  362. static bool isT2i32Store(unsigned Opc) {
  363. return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
  364. }
  365. static bool isi32Store(unsigned Opc) {
  366. return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
  367. }
  368. static bool isLoadSingle(unsigned Opc) {
  369. return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
  370. }
  371. static unsigned getImmScale(unsigned Opc) {
  372. switch (Opc) {
  373. default: llvm_unreachable("Unhandled opcode!");
  374. case ARM::tLDRi:
  375. case ARM::tSTRi:
  376. case ARM::tLDRspi:
  377. case ARM::tSTRspi:
  378. return 1;
  379. case ARM::tLDRHi:
  380. case ARM::tSTRHi:
  381. return 2;
  382. case ARM::tLDRBi:
  383. case ARM::tSTRBi:
  384. return 4;
  385. }
  386. }
  387. static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
  388. switch (MI->getOpcode()) {
  389. default: return 0;
  390. case ARM::LDRi12:
  391. case ARM::STRi12:
  392. case ARM::tLDRi:
  393. case ARM::tSTRi:
  394. case ARM::tLDRspi:
  395. case ARM::tSTRspi:
  396. case ARM::t2LDRi8:
  397. case ARM::t2LDRi12:
  398. case ARM::t2STRi8:
  399. case ARM::t2STRi12:
  400. case ARM::VLDRS:
  401. case ARM::VSTRS:
  402. return 4;
  403. case ARM::VLDRD:
  404. case ARM::VSTRD:
  405. return 8;
  406. case ARM::LDMIA:
  407. case ARM::LDMDA:
  408. case ARM::LDMDB:
  409. case ARM::LDMIB:
  410. case ARM::STMIA:
  411. case ARM::STMDA:
  412. case ARM::STMDB:
  413. case ARM::STMIB:
  414. case ARM::tLDMIA:
  415. case ARM::tLDMIA_UPD:
  416. case ARM::tSTMIA_UPD:
  417. case ARM::t2LDMIA:
  418. case ARM::t2LDMDB:
  419. case ARM::t2STMIA:
  420. case ARM::t2STMDB:
  421. case ARM::VLDMSIA:
  422. case ARM::VSTMSIA:
  423. return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
  424. case ARM::VLDMDIA:
  425. case ARM::VSTMDIA:
  426. return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
  427. }
  428. }
  429. /// Update future uses of the base register with the offset introduced
  430. /// due to writeback. This function only works on Thumb1.
  431. void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
  432. MachineBasicBlock::iterator MBBI,
  433. const DebugLoc &DL, unsigned Base,
  434. unsigned WordOffset,
  435. ARMCC::CondCodes Pred,
  436. unsigned PredReg) {
  437. assert(isThumb1 && "Can only update base register uses for Thumb1!");
  438. // Start updating any instructions with immediate offsets. Insert a SUB before
  439. // the first non-updateable instruction (if any).
  440. for (; MBBI != MBB.end(); ++MBBI) {
  441. bool InsertSub = false;
  442. unsigned Opc = MBBI->getOpcode();
  443. if (MBBI->readsRegister(Base)) {
  444. int Offset;
  445. bool IsLoad =
  446. Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
  447. bool IsStore =
  448. Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
  449. if (IsLoad || IsStore) {
  450. // Loads and stores with immediate offsets can be updated, but only if
  451. // the new offset isn't negative.
  452. // The MachineOperand containing the offset immediate is the last one
  453. // before predicates.
  454. MachineOperand &MO =
  455. MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
  456. // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
  457. Offset = MO.getImm() - WordOffset * getImmScale(Opc);
  458. // If storing the base register, it needs to be reset first.
  459. unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
  460. if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
  461. MO.setImm(Offset);
  462. else
  463. InsertSub = true;
  464. } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
  465. !definesCPSR(*MBBI)) {
  466. // SUBS/ADDS using this register, with a dead def of the CPSR.
  467. // Merge it with the update; if the merged offset is too large,
  468. // insert a new sub instead.
  469. MachineOperand &MO =
  470. MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
  471. Offset = (Opc == ARM::tSUBi8) ?
  472. MO.getImm() + WordOffset * 4 :
  473. MO.getImm() - WordOffset * 4 ;
  474. if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
  475. // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
  476. // Offset == 0.
  477. MO.setImm(Offset);
  478. // The base register has now been reset, so exit early.
  479. return;
  480. } else {
  481. InsertSub = true;
  482. }
  483. } else {
  484. // Can't update the instruction.
  485. InsertSub = true;
  486. }
  487. } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
  488. // Since SUBS sets the condition flags, we can't place the base reset
  489. // after an instruction that has a live CPSR def.
  490. // The base register might also contain an argument for a function call.
  491. InsertSub = true;
  492. }
  493. if (InsertSub) {
  494. // An instruction above couldn't be updated, so insert a sub.
  495. BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
  496. .add(t1CondCodeOp(true))
  497. .addReg(Base)
  498. .addImm(WordOffset * 4)
  499. .addImm(Pred)
  500. .addReg(PredReg);
  501. return;
  502. }
  503. if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
  504. // Register got killed. Stop updating.
  505. return;
  506. }
  507. // End of block was reached.
  508. if (MBB.succ_size() > 0) {
  509. // FIXME: Because of a bug, live registers are sometimes missing from
  510. // the successor blocks' live-in sets. This means we can't trust that
  511. // information and *always* have to reset at the end of a block.
  512. // See PR21029.
  513. if (MBBI != MBB.end()) --MBBI;
  514. BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base)
  515. .add(t1CondCodeOp(true))
  516. .addReg(Base)
  517. .addImm(WordOffset * 4)
  518. .addImm(Pred)
  519. .addReg(PredReg);
  520. }
  521. }
  522. /// Return the first register of class \p RegClass that is not in \p Regs.
  523. unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
  524. if (!RegClassInfoValid) {
  525. RegClassInfo.runOnMachineFunction(*MF);
  526. RegClassInfoValid = true;
  527. }
  528. for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
  529. if (!LiveRegs.contains(Reg))
  530. return Reg;
  531. return 0;
  532. }
  533. /// Compute live registers just before instruction \p Before (in normal schedule
  534. /// direction). Computes backwards so multiple queries in the same block must
  535. /// come in reverse order.
  536. void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
  537. MachineBasicBlock::const_iterator Before) {
  538. // Initialize if we never queried in this block.
  539. if (!LiveRegsValid) {
  540. LiveRegs.init(*TRI);
  541. LiveRegs.addLiveOuts(MBB);
  542. LiveRegPos = MBB.end();
  543. LiveRegsValid = true;
  544. }
  545. // Move backward just before the "Before" position.
  546. while (LiveRegPos != Before) {
  547. --LiveRegPos;
  548. LiveRegs.stepBackward(*LiveRegPos);
  549. }
  550. }
  551. static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
  552. unsigned Reg) {
  553. for (const std::pair<unsigned, bool> &R : Regs)
  554. if (R.first == Reg)
  555. return true;
  556. return false;
  557. }
  558. /// Create and insert a LDM or STM with Base as base register and registers in
  559. /// Regs as the register operands that would be loaded / stored. It returns
  560. /// true if the transformation is done.
  561. MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
  562. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  563. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  564. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  565. ArrayRef<std::pair<unsigned, bool>> Regs,
  566. ArrayRef<MachineInstr*> Instrs) {
  567. unsigned NumRegs = Regs.size();
  568. assert(NumRegs > 1);
  569. // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
  570. // Compute liveness information for that register to make the decision.
  571. bool SafeToClobberCPSR = !isThumb1 ||
  572. (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
  573. MachineBasicBlock::LQR_Dead);
  574. bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
  575. // Exception: If the base register is in the input reglist, Thumb1 LDM is
  576. // non-writeback.
  577. // It's also not possible to merge an STR of the base register in Thumb1.
  578. if (isThumb1 && ContainsReg(Regs, Base)) {
  579. assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
  580. if (Opcode == ARM::tLDRi)
  581. Writeback = false;
  582. else if (Opcode == ARM::tSTRi)
  583. return nullptr;
  584. }
  585. ARM_AM::AMSubMode Mode = ARM_AM::ia;
  586. // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
  587. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
  588. bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
  589. if (Offset == 4 && haveIBAndDA) {
  590. Mode = ARM_AM::ib;
  591. } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
  592. Mode = ARM_AM::da;
  593. } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
  594. // VLDM/VSTM do not support DB mode without also updating the base reg.
  595. Mode = ARM_AM::db;
  596. } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
  597. // Check if this is a supported opcode before inserting instructions to
  598. // calculate a new base register.
  599. if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
  600. // If starting offset isn't zero, insert a MI to materialize a new base.
  601. // But only do so if it is cost effective, i.e. merging more than two
  602. // loads / stores.
  603. if (NumRegs <= 2)
  604. return nullptr;
  605. // On Thumb1, it's not worth materializing a new base register without
  606. // clobbering the CPSR (i.e. not using ADDS/SUBS).
  607. if (!SafeToClobberCPSR)
  608. return nullptr;
  609. unsigned NewBase;
  610. if (isi32Load(Opcode)) {
  611. // If it is a load, then just use one of the destination registers
  612. // as the new base. Will no longer be writeback in Thumb1.
  613. NewBase = Regs[NumRegs-1].first;
  614. Writeback = false;
  615. } else {
  616. // Find a free register that we can use as scratch register.
  617. moveLiveRegsBefore(MBB, InsertBefore);
  618. // The merged instruction does not exist yet but will use several Regs if
  619. // it is a Store.
  620. if (!isLoadSingle(Opcode))
  621. for (const std::pair<unsigned, bool> &R : Regs)
  622. LiveRegs.addReg(R.first);
  623. NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
  624. if (NewBase == 0)
  625. return nullptr;
  626. }
  627. int BaseOpc =
  628. isThumb2 ? ARM::t2ADDri :
  629. (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
  630. (isThumb1 && Offset < 8) ? ARM::tADDi3 :
  631. isThumb1 ? ARM::tADDi8 : ARM::ADDri;
  632. if (Offset < 0) {
  633. Offset = - Offset;
  634. BaseOpc =
  635. isThumb2 ? ARM::t2SUBri :
  636. (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
  637. isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
  638. }
  639. if (!TL->isLegalAddImmediate(Offset))
  640. // FIXME: Try add with register operand?
  641. return nullptr; // Probably not worth it then.
  642. // We can only append a kill flag to the add/sub input if the value is not
  643. // used in the register list of the stm as well.
  644. bool KillOldBase = BaseKill &&
  645. (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
  646. if (isThumb1) {
  647. // Thumb1: depending on immediate size, use either
  648. // ADDS NewBase, Base, #imm3
  649. // or
  650. // MOV NewBase, Base
  651. // ADDS NewBase, #imm8.
  652. if (Base != NewBase &&
  653. (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
  654. // Need to insert a MOV to the new base first.
  655. if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
  656. !STI->hasV6Ops()) {
  657. // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
  658. if (Pred != ARMCC::AL)
  659. return nullptr;
  660. BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
  661. .addReg(Base, getKillRegState(KillOldBase));
  662. } else
  663. BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
  664. .addReg(Base, getKillRegState(KillOldBase))
  665. .add(predOps(Pred, PredReg));
  666. // The following ADDS/SUBS becomes an update.
  667. Base = NewBase;
  668. KillOldBase = true;
  669. }
  670. if (BaseOpc == ARM::tADDrSPi) {
  671. assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
  672. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  673. .addReg(Base, getKillRegState(KillOldBase))
  674. .addImm(Offset / 4)
  675. .add(predOps(Pred, PredReg));
  676. } else
  677. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  678. .add(t1CondCodeOp(true))
  679. .addReg(Base, getKillRegState(KillOldBase))
  680. .addImm(Offset)
  681. .add(predOps(Pred, PredReg));
  682. } else {
  683. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  684. .addReg(Base, getKillRegState(KillOldBase))
  685. .addImm(Offset)
  686. .add(predOps(Pred, PredReg))
  687. .add(condCodeOp());
  688. }
  689. Base = NewBase;
  690. BaseKill = true; // New base is always killed straight away.
  691. }
  692. bool isDef = isLoadSingle(Opcode);
  693. // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
  694. // base register writeback.
  695. Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
  696. if (!Opcode)
  697. return nullptr;
  698. // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
  699. // - There is no writeback (LDM of base register),
  700. // - the base register is killed by the merged instruction,
  701. // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
  702. // to reset the base register.
  703. // Otherwise, don't merge.
  704. // It's safe to return here since the code to materialize a new base register
  705. // above is also conditional on SafeToClobberCPSR.
  706. if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
  707. return nullptr;
  708. MachineInstrBuilder MIB;
  709. if (Writeback) {
  710. assert(isThumb1 && "expected Writeback only inThumb1");
  711. if (Opcode == ARM::tLDMIA) {
  712. assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
  713. // Update tLDMIA with writeback if necessary.
  714. Opcode = ARM::tLDMIA_UPD;
  715. }
  716. MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
  717. // Thumb1: we might need to set base writeback when building the MI.
  718. MIB.addReg(Base, getDefRegState(true))
  719. .addReg(Base, getKillRegState(BaseKill));
  720. // The base isn't dead after a merged instruction with writeback.
  721. // Insert a sub instruction after the newly formed instruction to reset.
  722. if (!BaseKill)
  723. UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
  724. } else {
  725. // No writeback, simply build the MachineInstr.
  726. MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
  727. MIB.addReg(Base, getKillRegState(BaseKill));
  728. }
  729. MIB.addImm(Pred).addReg(PredReg);
  730. for (const std::pair<unsigned, bool> &R : Regs)
  731. MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
  732. MIB.cloneMergedMemRefs(Instrs);
  733. return MIB.getInstr();
  734. }
  735. MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
  736. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  737. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  738. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  739. ArrayRef<std::pair<unsigned, bool>> Regs,
  740. ArrayRef<MachineInstr*> Instrs) const {
  741. bool IsLoad = isi32Load(Opcode);
  742. assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
  743. unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
  744. assert(Regs.size() == 2);
  745. MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
  746. TII->get(LoadStoreOpcode));
  747. if (IsLoad) {
  748. MIB.addReg(Regs[0].first, RegState::Define)
  749. .addReg(Regs[1].first, RegState::Define);
  750. } else {
  751. MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
  752. .addReg(Regs[1].first, getKillRegState(Regs[1].second));
  753. }
  754. MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
  755. MIB.cloneMergedMemRefs(Instrs);
  756. return MIB.getInstr();
  757. }
  758. /// Call MergeOps and update MemOps and merges accordingly on success.
  759. MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
  760. const MachineInstr *First = Cand.Instrs.front();
  761. unsigned Opcode = First->getOpcode();
  762. bool IsLoad = isLoadSingle(Opcode);
  763. SmallVector<std::pair<unsigned, bool>, 8> Regs;
  764. SmallVector<unsigned, 4> ImpDefs;
  765. DenseSet<unsigned> KilledRegs;
  766. DenseSet<unsigned> UsedRegs;
  767. // Determine list of registers and list of implicit super-register defs.
  768. for (const MachineInstr *MI : Cand.Instrs) {
  769. const MachineOperand &MO = getLoadStoreRegOp(*MI);
  770. unsigned Reg = MO.getReg();
  771. bool IsKill = MO.isKill();
  772. if (IsKill)
  773. KilledRegs.insert(Reg);
  774. Regs.push_back(std::make_pair(Reg, IsKill));
  775. UsedRegs.insert(Reg);
  776. if (IsLoad) {
  777. // Collect any implicit defs of super-registers, after merging we can't
  778. // be sure anymore that we properly preserved these live ranges and must
  779. // removed these implicit operands.
  780. for (const MachineOperand &MO : MI->implicit_operands()) {
  781. if (!MO.isReg() || !MO.isDef() || MO.isDead())
  782. continue;
  783. assert(MO.isImplicit());
  784. unsigned DefReg = MO.getReg();
  785. if (is_contained(ImpDefs, DefReg))
  786. continue;
  787. // We can ignore cases where the super-reg is read and written.
  788. if (MI->readsRegister(DefReg))
  789. continue;
  790. ImpDefs.push_back(DefReg);
  791. }
  792. }
  793. }
  794. // Attempt the merge.
  795. using iterator = MachineBasicBlock::iterator;
  796. MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
  797. iterator InsertBefore = std::next(iterator(LatestMI));
  798. MachineBasicBlock &MBB = *LatestMI->getParent();
  799. unsigned Offset = getMemoryOpOffset(*First);
  800. unsigned Base = getLoadStoreBaseOp(*First).getReg();
  801. bool BaseKill = LatestMI->killsRegister(Base);
  802. unsigned PredReg = 0;
  803. ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
  804. DebugLoc DL = First->getDebugLoc();
  805. MachineInstr *Merged = nullptr;
  806. if (Cand.CanMergeToLSDouble)
  807. Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
  808. Opcode, Pred, PredReg, DL, Regs,
  809. Cand.Instrs);
  810. if (!Merged && Cand.CanMergeToLSMulti)
  811. Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
  812. Opcode, Pred, PredReg, DL, Regs, Cand.Instrs);
  813. if (!Merged)
  814. return nullptr;
  815. // Determine earliest instruction that will get removed. We then keep an
  816. // iterator just above it so the following erases don't invalidated it.
  817. iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
  818. bool EarliestAtBegin = false;
  819. if (EarliestI == MBB.begin()) {
  820. EarliestAtBegin = true;
  821. } else {
  822. EarliestI = std::prev(EarliestI);
  823. }
  824. // Remove instructions which have been merged.
  825. for (MachineInstr *MI : Cand.Instrs)
  826. MBB.erase(MI);
  827. // Determine range between the earliest removed instruction and the new one.
  828. if (EarliestAtBegin)
  829. EarliestI = MBB.begin();
  830. else
  831. EarliestI = std::next(EarliestI);
  832. auto FixupRange = make_range(EarliestI, iterator(Merged));
  833. if (isLoadSingle(Opcode)) {
  834. // If the previous loads defined a super-reg, then we have to mark earlier
  835. // operands undef; Replicate the super-reg def on the merged instruction.
  836. for (MachineInstr &MI : FixupRange) {
  837. for (unsigned &ImpDefReg : ImpDefs) {
  838. for (MachineOperand &MO : MI.implicit_operands()) {
  839. if (!MO.isReg() || MO.getReg() != ImpDefReg)
  840. continue;
  841. if (MO.readsReg())
  842. MO.setIsUndef();
  843. else if (MO.isDef())
  844. ImpDefReg = 0;
  845. }
  846. }
  847. }
  848. MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
  849. for (unsigned ImpDef : ImpDefs)
  850. MIB.addReg(ImpDef, RegState::ImplicitDefine);
  851. } else {
  852. // Remove kill flags: We are possibly storing the values later now.
  853. assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
  854. for (MachineInstr &MI : FixupRange) {
  855. for (MachineOperand &MO : MI.uses()) {
  856. if (!MO.isReg() || !MO.isKill())
  857. continue;
  858. if (UsedRegs.count(MO.getReg()))
  859. MO.setIsKill(false);
  860. }
  861. }
  862. assert(ImpDefs.empty());
  863. }
  864. return Merged;
  865. }
  866. static bool isValidLSDoubleOffset(int Offset) {
  867. unsigned Value = abs(Offset);
  868. // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
  869. // multiplied by 4.
  870. return (Value % 4) == 0 && Value < 1024;
  871. }
  872. /// Return true for loads/stores that can be combined to a double/multi
  873. /// operation without increasing the requirements for alignment.
  874. static bool mayCombineMisaligned(const TargetSubtargetInfo &STI,
  875. const MachineInstr &MI) {
  876. // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
  877. // difference.
  878. unsigned Opcode = MI.getOpcode();
  879. if (!isi32Load(Opcode) && !isi32Store(Opcode))
  880. return true;
  881. // Stack pointer alignment is out of the programmers control so we can trust
  882. // SP-relative loads/stores.
  883. if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
  884. STI.getFrameLowering()->getTransientStackAlignment() >= 4)
  885. return true;
  886. return false;
  887. }
  888. /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
  889. void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
  890. const MachineInstr *FirstMI = MemOps[0].MI;
  891. unsigned Opcode = FirstMI->getOpcode();
  892. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
  893. unsigned Size = getLSMultipleTransferSize(FirstMI);
  894. unsigned SIndex = 0;
  895. unsigned EIndex = MemOps.size();
  896. do {
  897. // Look at the first instruction.
  898. const MachineInstr *MI = MemOps[SIndex].MI;
  899. int Offset = MemOps[SIndex].Offset;
  900. const MachineOperand &PMO = getLoadStoreRegOp(*MI);
  901. unsigned PReg = PMO.getReg();
  902. unsigned PRegNum = PMO.isUndef() ? std::numeric_limits<unsigned>::max()
  903. : TRI->getEncodingValue(PReg);
  904. unsigned Latest = SIndex;
  905. unsigned Earliest = SIndex;
  906. unsigned Count = 1;
  907. bool CanMergeToLSDouble =
  908. STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
  909. // ARM errata 602117: LDRD with base in list may result in incorrect base
  910. // register when interrupted or faulted.
  911. if (STI->isCortexM3() && isi32Load(Opcode) &&
  912. PReg == getLoadStoreBaseOp(*MI).getReg())
  913. CanMergeToLSDouble = false;
  914. bool CanMergeToLSMulti = true;
  915. // On swift vldm/vstm starting with an odd register number as that needs
  916. // more uops than single vldrs.
  917. if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
  918. CanMergeToLSMulti = false;
  919. // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
  920. // deprecated; LDM to PC is fine but cannot happen here.
  921. if (PReg == ARM::SP || PReg == ARM::PC)
  922. CanMergeToLSMulti = CanMergeToLSDouble = false;
  923. // Should we be conservative?
  924. if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
  925. CanMergeToLSMulti = CanMergeToLSDouble = false;
  926. // vldm / vstm limit are 32 for S variants, 16 for D variants.
  927. unsigned Limit;
  928. switch (Opcode) {
  929. default:
  930. Limit = UINT_MAX;
  931. break;
  932. case ARM::VLDRD:
  933. case ARM::VSTRD:
  934. Limit = 16;
  935. break;
  936. }
  937. // Merge following instructions where possible.
  938. for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
  939. int NewOffset = MemOps[I].Offset;
  940. if (NewOffset != Offset + (int)Size)
  941. break;
  942. const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
  943. unsigned Reg = MO.getReg();
  944. if (Reg == ARM::SP || Reg == ARM::PC)
  945. break;
  946. if (Count == Limit)
  947. break;
  948. // See if the current load/store may be part of a multi load/store.
  949. unsigned RegNum = MO.isUndef() ? std::numeric_limits<unsigned>::max()
  950. : TRI->getEncodingValue(Reg);
  951. bool PartOfLSMulti = CanMergeToLSMulti;
  952. if (PartOfLSMulti) {
  953. // Register numbers must be in ascending order.
  954. if (RegNum <= PRegNum)
  955. PartOfLSMulti = false;
  956. // For VFP / NEON load/store multiples, the registers must be
  957. // consecutive and within the limit on the number of registers per
  958. // instruction.
  959. else if (!isNotVFP && RegNum != PRegNum+1)
  960. PartOfLSMulti = false;
  961. }
  962. // See if the current load/store may be part of a double load/store.
  963. bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
  964. if (!PartOfLSMulti && !PartOfLSDouble)
  965. break;
  966. CanMergeToLSMulti &= PartOfLSMulti;
  967. CanMergeToLSDouble &= PartOfLSDouble;
  968. // Track MemOp with latest and earliest position (Positions are
  969. // counted in reverse).
  970. unsigned Position = MemOps[I].Position;
  971. if (Position < MemOps[Latest].Position)
  972. Latest = I;
  973. else if (Position > MemOps[Earliest].Position)
  974. Earliest = I;
  975. // Prepare for next MemOp.
  976. Offset += Size;
  977. PRegNum = RegNum;
  978. }
  979. // Form a candidate from the Ops collected so far.
  980. MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
  981. for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
  982. Candidate->Instrs.push_back(MemOps[C].MI);
  983. Candidate->LatestMIIdx = Latest - SIndex;
  984. Candidate->EarliestMIIdx = Earliest - SIndex;
  985. Candidate->InsertPos = MemOps[Latest].Position;
  986. if (Count == 1)
  987. CanMergeToLSMulti = CanMergeToLSDouble = false;
  988. Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
  989. Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
  990. Candidates.push_back(Candidate);
  991. // Continue after the chain.
  992. SIndex += Count;
  993. } while (SIndex < EIndex);
  994. }
  995. static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
  996. ARM_AM::AMSubMode Mode) {
  997. switch (Opc) {
  998. default: llvm_unreachable("Unhandled opcode!");
  999. case ARM::LDMIA:
  1000. case ARM::LDMDA:
  1001. case ARM::LDMDB:
  1002. case ARM::LDMIB:
  1003. switch (Mode) {
  1004. default: llvm_unreachable("Unhandled submode!");
  1005. case ARM_AM::ia: return ARM::LDMIA_UPD;
  1006. case ARM_AM::ib: return ARM::LDMIB_UPD;
  1007. case ARM_AM::da: return ARM::LDMDA_UPD;
  1008. case ARM_AM::db: return ARM::LDMDB_UPD;
  1009. }
  1010. case ARM::STMIA:
  1011. case ARM::STMDA:
  1012. case ARM::STMDB:
  1013. case ARM::STMIB:
  1014. switch (Mode) {
  1015. default: llvm_unreachable("Unhandled submode!");
  1016. case ARM_AM::ia: return ARM::STMIA_UPD;
  1017. case ARM_AM::ib: return ARM::STMIB_UPD;
  1018. case ARM_AM::da: return ARM::STMDA_UPD;
  1019. case ARM_AM::db: return ARM::STMDB_UPD;
  1020. }
  1021. case ARM::t2LDMIA:
  1022. case ARM::t2LDMDB:
  1023. switch (Mode) {
  1024. default: llvm_unreachable("Unhandled submode!");
  1025. case ARM_AM::ia: return ARM::t2LDMIA_UPD;
  1026. case ARM_AM::db: return ARM::t2LDMDB_UPD;
  1027. }
  1028. case ARM::t2STMIA:
  1029. case ARM::t2STMDB:
  1030. switch (Mode) {
  1031. default: llvm_unreachable("Unhandled submode!");
  1032. case ARM_AM::ia: return ARM::t2STMIA_UPD;
  1033. case ARM_AM::db: return ARM::t2STMDB_UPD;
  1034. }
  1035. case ARM::VLDMSIA:
  1036. switch (Mode) {
  1037. default: llvm_unreachable("Unhandled submode!");
  1038. case ARM_AM::ia: return ARM::VLDMSIA_UPD;
  1039. case ARM_AM::db: return ARM::VLDMSDB_UPD;
  1040. }
  1041. case ARM::VLDMDIA:
  1042. switch (Mode) {
  1043. default: llvm_unreachable("Unhandled submode!");
  1044. case ARM_AM::ia: return ARM::VLDMDIA_UPD;
  1045. case ARM_AM::db: return ARM::VLDMDDB_UPD;
  1046. }
  1047. case ARM::VSTMSIA:
  1048. switch (Mode) {
  1049. default: llvm_unreachable("Unhandled submode!");
  1050. case ARM_AM::ia: return ARM::VSTMSIA_UPD;
  1051. case ARM_AM::db: return ARM::VSTMSDB_UPD;
  1052. }
  1053. case ARM::VSTMDIA:
  1054. switch (Mode) {
  1055. default: llvm_unreachable("Unhandled submode!");
  1056. case ARM_AM::ia: return ARM::VSTMDIA_UPD;
  1057. case ARM_AM::db: return ARM::VSTMDDB_UPD;
  1058. }
  1059. }
  1060. }
  1061. /// Check if the given instruction increments or decrements a register and
  1062. /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
  1063. /// generated by the instruction are possibly read as well.
  1064. static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
  1065. ARMCC::CondCodes Pred, unsigned PredReg) {
  1066. bool CheckCPSRDef;
  1067. int Scale;
  1068. switch (MI.getOpcode()) {
  1069. case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
  1070. case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
  1071. case ARM::t2SUBri:
  1072. case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
  1073. case ARM::t2ADDri:
  1074. case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
  1075. case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
  1076. case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
  1077. default: return 0;
  1078. }
  1079. unsigned MIPredReg;
  1080. if (MI.getOperand(0).getReg() != Reg ||
  1081. MI.getOperand(1).getReg() != Reg ||
  1082. getInstrPredicate(MI, MIPredReg) != Pred ||
  1083. MIPredReg != PredReg)
  1084. return 0;
  1085. if (CheckCPSRDef && definesCPSR(MI))
  1086. return 0;
  1087. return MI.getOperand(2).getImm() * Scale;
  1088. }
  1089. /// Searches for an increment or decrement of \p Reg before \p MBBI.
  1090. static MachineBasicBlock::iterator
  1091. findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
  1092. ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
  1093. Offset = 0;
  1094. MachineBasicBlock &MBB = *MBBI->getParent();
  1095. MachineBasicBlock::iterator BeginMBBI = MBB.begin();
  1096. MachineBasicBlock::iterator EndMBBI = MBB.end();
  1097. if (MBBI == BeginMBBI)
  1098. return EndMBBI;
  1099. // Skip debug values.
  1100. MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
  1101. while (PrevMBBI->isDebugInstr() && PrevMBBI != BeginMBBI)
  1102. --PrevMBBI;
  1103. Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
  1104. return Offset == 0 ? EndMBBI : PrevMBBI;
  1105. }
  1106. /// Searches for a increment or decrement of \p Reg after \p MBBI.
  1107. static MachineBasicBlock::iterator
  1108. findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
  1109. ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
  1110. Offset = 0;
  1111. MachineBasicBlock &MBB = *MBBI->getParent();
  1112. MachineBasicBlock::iterator EndMBBI = MBB.end();
  1113. MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
  1114. // Skip debug values.
  1115. while (NextMBBI != EndMBBI && NextMBBI->isDebugInstr())
  1116. ++NextMBBI;
  1117. if (NextMBBI == EndMBBI)
  1118. return EndMBBI;
  1119. Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
  1120. return Offset == 0 ? EndMBBI : NextMBBI;
  1121. }
  1122. /// Fold proceeding/trailing inc/dec of base register into the
  1123. /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
  1124. ///
  1125. /// stmia rn, <ra, rb, rc>
  1126. /// rn := rn + 4 * 3;
  1127. /// =>
  1128. /// stmia rn!, <ra, rb, rc>
  1129. ///
  1130. /// rn := rn - 4 * 3;
  1131. /// ldmia rn, <ra, rb, rc>
  1132. /// =>
  1133. /// ldmdb rn!, <ra, rb, rc>
  1134. bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
  1135. // Thumb1 is already using updating loads/stores.
  1136. if (isThumb1) return false;
  1137. const MachineOperand &BaseOP = MI->getOperand(0);
  1138. unsigned Base = BaseOP.getReg();
  1139. bool BaseKill = BaseOP.isKill();
  1140. unsigned PredReg = 0;
  1141. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1142. unsigned Opcode = MI->getOpcode();
  1143. DebugLoc DL = MI->getDebugLoc();
  1144. // Can't use an updating ld/st if the base register is also a dest
  1145. // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
  1146. for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
  1147. if (MI->getOperand(i).getReg() == Base)
  1148. return false;
  1149. int Bytes = getLSMultipleTransferSize(MI);
  1150. MachineBasicBlock &MBB = *MI->getParent();
  1151. MachineBasicBlock::iterator MBBI(MI);
  1152. int Offset;
  1153. MachineBasicBlock::iterator MergeInstr
  1154. = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
  1155. ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
  1156. if (Mode == ARM_AM::ia && Offset == -Bytes) {
  1157. Mode = ARM_AM::db;
  1158. } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
  1159. Mode = ARM_AM::da;
  1160. } else {
  1161. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
  1162. if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
  1163. ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
  1164. // We couldn't find an inc/dec to merge. But if the base is dead, we
  1165. // can still change to a writeback form as that will save us 2 bytes
  1166. // of code size. It can create WAW hazards though, so only do it if
  1167. // we're minimizing code size.
  1168. if (!STI->hasMinSize() || !BaseKill)
  1169. return false;
  1170. bool HighRegsUsed = false;
  1171. for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
  1172. if (MI->getOperand(i).getReg() >= ARM::R8) {
  1173. HighRegsUsed = true;
  1174. break;
  1175. }
  1176. if (!HighRegsUsed)
  1177. MergeInstr = MBB.end();
  1178. else
  1179. return false;
  1180. }
  1181. }
  1182. if (MergeInstr != MBB.end())
  1183. MBB.erase(MergeInstr);
  1184. unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
  1185. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
  1186. .addReg(Base, getDefRegState(true)) // WB base register
  1187. .addReg(Base, getKillRegState(BaseKill))
  1188. .addImm(Pred).addReg(PredReg);
  1189. // Transfer the rest of operands.
  1190. for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
  1191. MIB.add(MI->getOperand(OpNum));
  1192. // Transfer memoperands.
  1193. MIB.setMemRefs(MI->memoperands());
  1194. MBB.erase(MBBI);
  1195. return true;
  1196. }
  1197. static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
  1198. ARM_AM::AddrOpc Mode) {
  1199. switch (Opc) {
  1200. case ARM::LDRi12:
  1201. return ARM::LDR_PRE_IMM;
  1202. case ARM::STRi12:
  1203. return ARM::STR_PRE_IMM;
  1204. case ARM::VLDRS:
  1205. return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
  1206. case ARM::VLDRD:
  1207. return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
  1208. case ARM::VSTRS:
  1209. return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
  1210. case ARM::VSTRD:
  1211. return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
  1212. case ARM::t2LDRi8:
  1213. case ARM::t2LDRi12:
  1214. return ARM::t2LDR_PRE;
  1215. case ARM::t2STRi8:
  1216. case ARM::t2STRi12:
  1217. return ARM::t2STR_PRE;
  1218. default: llvm_unreachable("Unhandled opcode!");
  1219. }
  1220. }
  1221. static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
  1222. ARM_AM::AddrOpc Mode) {
  1223. switch (Opc) {
  1224. case ARM::LDRi12:
  1225. return ARM::LDR_POST_IMM;
  1226. case ARM::STRi12:
  1227. return ARM::STR_POST_IMM;
  1228. case ARM::VLDRS:
  1229. return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
  1230. case ARM::VLDRD:
  1231. return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
  1232. case ARM::VSTRS:
  1233. return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
  1234. case ARM::VSTRD:
  1235. return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
  1236. case ARM::t2LDRi8:
  1237. case ARM::t2LDRi12:
  1238. return ARM::t2LDR_POST;
  1239. case ARM::t2STRi8:
  1240. case ARM::t2STRi12:
  1241. return ARM::t2STR_POST;
  1242. default: llvm_unreachable("Unhandled opcode!");
  1243. }
  1244. }
  1245. /// Fold proceeding/trailing inc/dec of base register into the
  1246. /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
  1247. bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
  1248. // Thumb1 doesn't have updating LDR/STR.
  1249. // FIXME: Use LDM/STM with single register instead.
  1250. if (isThumb1) return false;
  1251. unsigned Base = getLoadStoreBaseOp(*MI).getReg();
  1252. bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
  1253. unsigned Opcode = MI->getOpcode();
  1254. DebugLoc DL = MI->getDebugLoc();
  1255. bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
  1256. Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
  1257. bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
  1258. if (isi32Load(Opcode) || isi32Store(Opcode))
  1259. if (MI->getOperand(2).getImm() != 0)
  1260. return false;
  1261. if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
  1262. return false;
  1263. // Can't do the merge if the destination register is the same as the would-be
  1264. // writeback register.
  1265. if (MI->getOperand(0).getReg() == Base)
  1266. return false;
  1267. unsigned PredReg = 0;
  1268. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1269. int Bytes = getLSMultipleTransferSize(MI);
  1270. MachineBasicBlock &MBB = *MI->getParent();
  1271. MachineBasicBlock::iterator MBBI(MI);
  1272. int Offset;
  1273. MachineBasicBlock::iterator MergeInstr
  1274. = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
  1275. unsigned NewOpc;
  1276. if (!isAM5 && Offset == Bytes) {
  1277. NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
  1278. } else if (Offset == -Bytes) {
  1279. NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
  1280. } else {
  1281. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
  1282. if (Offset == Bytes) {
  1283. NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
  1284. } else if (!isAM5 && Offset == -Bytes) {
  1285. NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
  1286. } else
  1287. return false;
  1288. }
  1289. MBB.erase(MergeInstr);
  1290. ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
  1291. bool isLd = isLoadSingle(Opcode);
  1292. if (isAM5) {
  1293. // VLDM[SD]_UPD, VSTM[SD]_UPD
  1294. // (There are no base-updating versions of VLDR/VSTR instructions, but the
  1295. // updating load/store-multiple instructions can be used with only one
  1296. // register.)
  1297. MachineOperand &MO = MI->getOperand(0);
  1298. BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
  1299. .addReg(Base, getDefRegState(true)) // WB base register
  1300. .addReg(Base, getKillRegState(isLd ? BaseKill : false))
  1301. .addImm(Pred).addReg(PredReg)
  1302. .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
  1303. getKillRegState(MO.isKill())))
  1304. .cloneMemRefs(*MI);
  1305. } else if (isLd) {
  1306. if (isAM2) {
  1307. // LDR_PRE, LDR_POST
  1308. if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
  1309. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1310. .addReg(Base, RegState::Define)
  1311. .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg)
  1312. .cloneMemRefs(*MI);
  1313. } else {
  1314. int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
  1315. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1316. .addReg(Base, RegState::Define)
  1317. .addReg(Base)
  1318. .addReg(0)
  1319. .addImm(Imm)
  1320. .add(predOps(Pred, PredReg))
  1321. .cloneMemRefs(*MI);
  1322. }
  1323. } else {
  1324. // t2LDR_PRE, t2LDR_POST
  1325. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1326. .addReg(Base, RegState::Define)
  1327. .addReg(Base)
  1328. .addImm(Offset)
  1329. .add(predOps(Pred, PredReg))
  1330. .cloneMemRefs(*MI);
  1331. }
  1332. } else {
  1333. MachineOperand &MO = MI->getOperand(0);
  1334. // FIXME: post-indexed stores use am2offset_imm, which still encodes
  1335. // the vestigal zero-reg offset register. When that's fixed, this clause
  1336. // can be removed entirely.
  1337. if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
  1338. int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
  1339. // STR_PRE, STR_POST
  1340. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
  1341. .addReg(MO.getReg(), getKillRegState(MO.isKill()))
  1342. .addReg(Base)
  1343. .addReg(0)
  1344. .addImm(Imm)
  1345. .add(predOps(Pred, PredReg))
  1346. .cloneMemRefs(*MI);
  1347. } else {
  1348. // t2STR_PRE, t2STR_POST
  1349. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
  1350. .addReg(MO.getReg(), getKillRegState(MO.isKill()))
  1351. .addReg(Base)
  1352. .addImm(Offset)
  1353. .add(predOps(Pred, PredReg))
  1354. .cloneMemRefs(*MI);
  1355. }
  1356. }
  1357. MBB.erase(MBBI);
  1358. return true;
  1359. }
  1360. bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
  1361. unsigned Opcode = MI.getOpcode();
  1362. assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
  1363. "Must have t2STRDi8 or t2LDRDi8");
  1364. if (MI.getOperand(3).getImm() != 0)
  1365. return false;
  1366. // Behaviour for writeback is undefined if base register is the same as one
  1367. // of the others.
  1368. const MachineOperand &BaseOp = MI.getOperand(2);
  1369. unsigned Base = BaseOp.getReg();
  1370. const MachineOperand &Reg0Op = MI.getOperand(0);
  1371. const MachineOperand &Reg1Op = MI.getOperand(1);
  1372. if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
  1373. return false;
  1374. unsigned PredReg;
  1375. ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
  1376. MachineBasicBlock::iterator MBBI(MI);
  1377. MachineBasicBlock &MBB = *MI.getParent();
  1378. int Offset;
  1379. MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
  1380. PredReg, Offset);
  1381. unsigned NewOpc;
  1382. if (Offset == 8 || Offset == -8) {
  1383. NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
  1384. } else {
  1385. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
  1386. if (Offset == 8 || Offset == -8) {
  1387. NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
  1388. } else
  1389. return false;
  1390. }
  1391. MBB.erase(MergeInstr);
  1392. DebugLoc DL = MI.getDebugLoc();
  1393. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
  1394. if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
  1395. MIB.add(Reg0Op).add(Reg1Op).addReg(BaseOp.getReg(), RegState::Define);
  1396. } else {
  1397. assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
  1398. MIB.addReg(BaseOp.getReg(), RegState::Define).add(Reg0Op).add(Reg1Op);
  1399. }
  1400. MIB.addReg(BaseOp.getReg(), RegState::Kill)
  1401. .addImm(Offset).addImm(Pred).addReg(PredReg);
  1402. assert(TII->get(Opcode).getNumOperands() == 6 &&
  1403. TII->get(NewOpc).getNumOperands() == 7 &&
  1404. "Unexpected number of operands in Opcode specification.");
  1405. // Transfer implicit operands.
  1406. for (const MachineOperand &MO : MI.implicit_operands())
  1407. MIB.add(MO);
  1408. MIB.cloneMemRefs(MI);
  1409. MBB.erase(MBBI);
  1410. return true;
  1411. }
  1412. /// Returns true if instruction is a memory operation that this pass is capable
  1413. /// of operating on.
  1414. static bool isMemoryOp(const MachineInstr &MI) {
  1415. unsigned Opcode = MI.getOpcode();
  1416. switch (Opcode) {
  1417. case ARM::VLDRS:
  1418. case ARM::VSTRS:
  1419. case ARM::VLDRD:
  1420. case ARM::VSTRD:
  1421. case ARM::LDRi12:
  1422. case ARM::STRi12:
  1423. case ARM::tLDRi:
  1424. case ARM::tSTRi:
  1425. case ARM::tLDRspi:
  1426. case ARM::tSTRspi:
  1427. case ARM::t2LDRi8:
  1428. case ARM::t2LDRi12:
  1429. case ARM::t2STRi8:
  1430. case ARM::t2STRi12:
  1431. break;
  1432. default:
  1433. return false;
  1434. }
  1435. if (!MI.getOperand(1).isReg())
  1436. return false;
  1437. // When no memory operands are present, conservatively assume unaligned,
  1438. // volatile, unfoldable.
  1439. if (!MI.hasOneMemOperand())
  1440. return false;
  1441. const MachineMemOperand &MMO = **MI.memoperands_begin();
  1442. // Don't touch volatile memory accesses - we may be changing their order.
  1443. // TODO: We could allow unordered and monotonic atomics here, but we need to
  1444. // make sure the resulting ldm/stm is correctly marked as atomic.
  1445. if (MMO.isVolatile() || MMO.isAtomic())
  1446. return false;
  1447. // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
  1448. // not.
  1449. if (MMO.getAlignment() < 4)
  1450. return false;
  1451. // str <undef> could probably be eliminated entirely, but for now we just want
  1452. // to avoid making a mess of it.
  1453. // FIXME: Use str <undef> as a wildcard to enable better stm folding.
  1454. if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
  1455. return false;
  1456. // Likewise don't mess with references to undefined addresses.
  1457. if (MI.getOperand(1).isUndef())
  1458. return false;
  1459. return true;
  1460. }
  1461. static void InsertLDR_STR(MachineBasicBlock &MBB,
  1462. MachineBasicBlock::iterator &MBBI, int Offset,
  1463. bool isDef, unsigned NewOpc, unsigned Reg,
  1464. bool RegDeadKill, bool RegUndef, unsigned BaseReg,
  1465. bool BaseKill, bool BaseUndef, ARMCC::CondCodes Pred,
  1466. unsigned PredReg, const TargetInstrInfo *TII,
  1467. MachineInstr *MI) {
  1468. if (isDef) {
  1469. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
  1470. TII->get(NewOpc))
  1471. .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
  1472. .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
  1473. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  1474. // FIXME: This is overly conservative; the new instruction accesses 4
  1475. // bytes, not 8.
  1476. MIB.cloneMemRefs(*MI);
  1477. } else {
  1478. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
  1479. TII->get(NewOpc))
  1480. .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
  1481. .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
  1482. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  1483. // FIXME: This is overly conservative; the new instruction accesses 4
  1484. // bytes, not 8.
  1485. MIB.cloneMemRefs(*MI);
  1486. }
  1487. }
  1488. bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
  1489. MachineBasicBlock::iterator &MBBI) {
  1490. MachineInstr *MI = &*MBBI;
  1491. unsigned Opcode = MI->getOpcode();
  1492. // FIXME: Code/comments below check Opcode == t2STRDi8, but this check returns
  1493. // if we see this opcode.
  1494. if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
  1495. return false;
  1496. const MachineOperand &BaseOp = MI->getOperand(2);
  1497. unsigned BaseReg = BaseOp.getReg();
  1498. unsigned EvenReg = MI->getOperand(0).getReg();
  1499. unsigned OddReg = MI->getOperand(1).getReg();
  1500. unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
  1501. unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
  1502. // ARM errata 602117: LDRD with base in list may result in incorrect base
  1503. // register when interrupted or faulted.
  1504. bool Errata602117 = EvenReg == BaseReg &&
  1505. (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
  1506. // ARM LDRD/STRD needs consecutive registers.
  1507. bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
  1508. (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
  1509. if (!Errata602117 && !NonConsecutiveRegs)
  1510. return false;
  1511. bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
  1512. bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
  1513. bool EvenDeadKill = isLd ?
  1514. MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
  1515. bool EvenUndef = MI->getOperand(0).isUndef();
  1516. bool OddDeadKill = isLd ?
  1517. MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
  1518. bool OddUndef = MI->getOperand(1).isUndef();
  1519. bool BaseKill = BaseOp.isKill();
  1520. bool BaseUndef = BaseOp.isUndef();
  1521. assert((isT2 || MI->getOperand(3).getReg() == ARM::NoRegister) &&
  1522. "register offset not handled below");
  1523. int OffImm = getMemoryOpOffset(*MI);
  1524. unsigned PredReg = 0;
  1525. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1526. if (OddRegNum > EvenRegNum && OffImm == 0) {
  1527. // Ascending register numbers and no offset. It's safe to change it to a
  1528. // ldm or stm.
  1529. unsigned NewOpc = (isLd)
  1530. ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
  1531. : (isT2 ? ARM::t2STMIA : ARM::STMIA);
  1532. if (isLd) {
  1533. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  1534. .addReg(BaseReg, getKillRegState(BaseKill))
  1535. .addImm(Pred).addReg(PredReg)
  1536. .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
  1537. .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill))
  1538. .cloneMemRefs(*MI);
  1539. ++NumLDRD2LDM;
  1540. } else {
  1541. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  1542. .addReg(BaseReg, getKillRegState(BaseKill))
  1543. .addImm(Pred).addReg(PredReg)
  1544. .addReg(EvenReg,
  1545. getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
  1546. .addReg(OddReg,
  1547. getKillRegState(OddDeadKill) | getUndefRegState(OddUndef))
  1548. .cloneMemRefs(*MI);
  1549. ++NumSTRD2STM;
  1550. }
  1551. } else {
  1552. // Split into two instructions.
  1553. unsigned NewOpc = (isLd)
  1554. ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
  1555. : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
  1556. // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
  1557. // so adjust and use t2LDRi12 here for that.
  1558. unsigned NewOpc2 = (isLd)
  1559. ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
  1560. : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
  1561. // If this is a load, make sure the first load does not clobber the base
  1562. // register before the second load reads it.
  1563. if (isLd && TRI->regsOverlap(EvenReg, BaseReg)) {
  1564. assert(!TRI->regsOverlap(OddReg, BaseReg));
  1565. InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
  1566. false, BaseReg, false, BaseUndef, Pred, PredReg, TII, MI);
  1567. InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
  1568. false, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
  1569. MI);
  1570. } else {
  1571. if (OddReg == EvenReg && EvenDeadKill) {
  1572. // If the two source operands are the same, the kill marker is
  1573. // probably on the first one. e.g.
  1574. // t2STRDi8 killed %r5, %r5, killed %r9, 0, 14, %reg0
  1575. EvenDeadKill = false;
  1576. OddDeadKill = true;
  1577. }
  1578. // Never kill the base register in the first instruction.
  1579. if (EvenReg == BaseReg)
  1580. EvenDeadKill = false;
  1581. InsertLDR_STR(MBB, MBBI, OffImm, isLd, NewOpc, EvenReg, EvenDeadKill,
  1582. EvenUndef, BaseReg, false, BaseUndef, Pred, PredReg, TII,
  1583. MI);
  1584. InsertLDR_STR(MBB, MBBI, OffImm + 4, isLd, NewOpc2, OddReg, OddDeadKill,
  1585. OddUndef, BaseReg, BaseKill, BaseUndef, Pred, PredReg, TII,
  1586. MI);
  1587. }
  1588. if (isLd)
  1589. ++NumLDRD2LDR;
  1590. else
  1591. ++NumSTRD2STR;
  1592. }
  1593. MBBI = MBB.erase(MBBI);
  1594. return true;
  1595. }
  1596. /// An optimization pass to turn multiple LDR / STR ops of the same base and
  1597. /// incrementing offset into LDM / STM ops.
  1598. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
  1599. MemOpQueue MemOps;
  1600. unsigned CurrBase = 0;
  1601. unsigned CurrOpc = ~0u;
  1602. ARMCC::CondCodes CurrPred = ARMCC::AL;
  1603. unsigned Position = 0;
  1604. assert(Candidates.size() == 0);
  1605. assert(MergeBaseCandidates.size() == 0);
  1606. LiveRegsValid = false;
  1607. for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
  1608. I = MBBI) {
  1609. // The instruction in front of the iterator is the one we look at.
  1610. MBBI = std::prev(I);
  1611. if (FixInvalidRegPairOp(MBB, MBBI))
  1612. continue;
  1613. ++Position;
  1614. if (isMemoryOp(*MBBI)) {
  1615. unsigned Opcode = MBBI->getOpcode();
  1616. const MachineOperand &MO = MBBI->getOperand(0);
  1617. unsigned Reg = MO.getReg();
  1618. unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
  1619. unsigned PredReg = 0;
  1620. ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
  1621. int Offset = getMemoryOpOffset(*MBBI);
  1622. if (CurrBase == 0) {
  1623. // Start of a new chain.
  1624. CurrBase = Base;
  1625. CurrOpc = Opcode;
  1626. CurrPred = Pred;
  1627. MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
  1628. continue;
  1629. }
  1630. // Note: No need to match PredReg in the next if.
  1631. if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
  1632. // Watch out for:
  1633. // r4 := ldr [r0, #8]
  1634. // r4 := ldr [r0, #4]
  1635. // or
  1636. // r0 := ldr [r0]
  1637. // If a load overrides the base register or a register loaded by
  1638. // another load in our chain, we cannot take this instruction.
  1639. bool Overlap = false;
  1640. if (isLoadSingle(Opcode)) {
  1641. Overlap = (Base == Reg);
  1642. if (!Overlap) {
  1643. for (const MemOpQueueEntry &E : MemOps) {
  1644. if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
  1645. Overlap = true;
  1646. break;
  1647. }
  1648. }
  1649. }
  1650. }
  1651. if (!Overlap) {
  1652. // Check offset and sort memory operation into the current chain.
  1653. if (Offset > MemOps.back().Offset) {
  1654. MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
  1655. continue;
  1656. } else {
  1657. MemOpQueue::iterator MI, ME;
  1658. for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
  1659. if (Offset < MI->Offset) {
  1660. // Found a place to insert.
  1661. break;
  1662. }
  1663. if (Offset == MI->Offset) {
  1664. // Collision, abort.
  1665. MI = ME;
  1666. break;
  1667. }
  1668. }
  1669. if (MI != MemOps.end()) {
  1670. MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
  1671. continue;
  1672. }
  1673. }
  1674. }
  1675. }
  1676. // Don't advance the iterator; The op will start a new chain next.
  1677. MBBI = I;
  1678. --Position;
  1679. // Fallthrough to look into existing chain.
  1680. } else if (MBBI->isDebugInstr()) {
  1681. continue;
  1682. } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
  1683. MBBI->getOpcode() == ARM::t2STRDi8) {
  1684. // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
  1685. // remember them because we may still be able to merge add/sub into them.
  1686. MergeBaseCandidates.push_back(&*MBBI);
  1687. }
  1688. // If we are here then the chain is broken; Extract candidates for a merge.
  1689. if (MemOps.size() > 0) {
  1690. FormCandidates(MemOps);
  1691. // Reset for the next chain.
  1692. CurrBase = 0;
  1693. CurrOpc = ~0u;
  1694. CurrPred = ARMCC::AL;
  1695. MemOps.clear();
  1696. }
  1697. }
  1698. if (MemOps.size() > 0)
  1699. FormCandidates(MemOps);
  1700. // Sort candidates so they get processed from end to begin of the basic
  1701. // block later; This is necessary for liveness calculation.
  1702. auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
  1703. return M0->InsertPos < M1->InsertPos;
  1704. };
  1705. llvm::sort(Candidates, LessThan);
  1706. // Go through list of candidates and merge.
  1707. bool Changed = false;
  1708. for (const MergeCandidate *Candidate : Candidates) {
  1709. if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
  1710. MachineInstr *Merged = MergeOpsUpdate(*Candidate);
  1711. // Merge preceding/trailing base inc/dec into the merged op.
  1712. if (Merged) {
  1713. Changed = true;
  1714. unsigned Opcode = Merged->getOpcode();
  1715. if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
  1716. MergeBaseUpdateLSDouble(*Merged);
  1717. else
  1718. MergeBaseUpdateLSMultiple(Merged);
  1719. } else {
  1720. for (MachineInstr *MI : Candidate->Instrs) {
  1721. if (MergeBaseUpdateLoadStore(MI))
  1722. Changed = true;
  1723. }
  1724. }
  1725. } else {
  1726. assert(Candidate->Instrs.size() == 1);
  1727. if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
  1728. Changed = true;
  1729. }
  1730. }
  1731. Candidates.clear();
  1732. // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
  1733. for (MachineInstr *MI : MergeBaseCandidates)
  1734. MergeBaseUpdateLSDouble(*MI);
  1735. MergeBaseCandidates.clear();
  1736. return Changed;
  1737. }
  1738. /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
  1739. /// into the preceding stack restore so it directly restore the value of LR
  1740. /// into pc.
  1741. /// ldmfd sp!, {..., lr}
  1742. /// bx lr
  1743. /// or
  1744. /// ldmfd sp!, {..., lr}
  1745. /// mov pc, lr
  1746. /// =>
  1747. /// ldmfd sp!, {..., pc}
  1748. bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
  1749. // Thumb1 LDM doesn't allow high registers.
  1750. if (isThumb1) return false;
  1751. if (MBB.empty()) return false;
  1752. MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  1753. if (MBBI != MBB.begin() && MBBI != MBB.end() &&
  1754. (MBBI->getOpcode() == ARM::BX_RET ||
  1755. MBBI->getOpcode() == ARM::tBX_RET ||
  1756. MBBI->getOpcode() == ARM::MOVPCLR)) {
  1757. MachineBasicBlock::iterator PrevI = std::prev(MBBI);
  1758. // Ignore any debug instructions.
  1759. while (PrevI->isDebugInstr() && PrevI != MBB.begin())
  1760. --PrevI;
  1761. MachineInstr &PrevMI = *PrevI;
  1762. unsigned Opcode = PrevMI.getOpcode();
  1763. if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
  1764. Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
  1765. Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
  1766. MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
  1767. if (MO.getReg() != ARM::LR)
  1768. return false;
  1769. unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
  1770. assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
  1771. Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
  1772. PrevMI.setDesc(TII->get(NewOpc));
  1773. MO.setReg(ARM::PC);
  1774. PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
  1775. MBB.erase(MBBI);
  1776. // We now restore LR into PC so it is not live-out of the return block
  1777. // anymore: Clear the CSI Restored bit.
  1778. MachineFrameInfo &MFI = MBB.getParent()->getFrameInfo();
  1779. // CSI should be fixed after PrologEpilog Insertion
  1780. assert(MFI.isCalleeSavedInfoValid() && "CSI should be valid");
  1781. for (CalleeSavedInfo &Info : MFI.getCalleeSavedInfo()) {
  1782. if (Info.getReg() == ARM::LR) {
  1783. Info.setRestored(false);
  1784. break;
  1785. }
  1786. }
  1787. return true;
  1788. }
  1789. }
  1790. return false;
  1791. }
  1792. bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
  1793. MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
  1794. if (MBBI == MBB.begin() || MBBI == MBB.end() ||
  1795. MBBI->getOpcode() != ARM::tBX_RET)
  1796. return false;
  1797. MachineBasicBlock::iterator Prev = MBBI;
  1798. --Prev;
  1799. if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
  1800. return false;
  1801. for (auto Use : Prev->uses())
  1802. if (Use.isKill()) {
  1803. assert(STI->hasV4TOps());
  1804. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
  1805. .addReg(Use.getReg(), RegState::Kill)
  1806. .add(predOps(ARMCC::AL))
  1807. .copyImplicitOps(*MBBI);
  1808. MBB.erase(MBBI);
  1809. MBB.erase(Prev);
  1810. return true;
  1811. }
  1812. llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
  1813. }
  1814. bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  1815. if (skipFunction(Fn.getFunction()))
  1816. return false;
  1817. MF = &Fn;
  1818. STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
  1819. TL = STI->getTargetLowering();
  1820. AFI = Fn.getInfo<ARMFunctionInfo>();
  1821. TII = STI->getInstrInfo();
  1822. TRI = STI->getRegisterInfo();
  1823. RegClassInfoValid = false;
  1824. isThumb2 = AFI->isThumb2Function();
  1825. isThumb1 = AFI->isThumbFunction() && !isThumb2;
  1826. bool Modified = false;
  1827. for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
  1828. ++MFI) {
  1829. MachineBasicBlock &MBB = *MFI;
  1830. Modified |= LoadStoreMultipleOpti(MBB);
  1831. if (STI->hasV5TOps())
  1832. Modified |= MergeReturnIntoLDM(MBB);
  1833. if (isThumb1)
  1834. Modified |= CombineMovBx(MBB);
  1835. }
  1836. Allocator.DestroyAll();
  1837. return Modified;
  1838. }
  1839. #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
  1840. "ARM pre- register allocation load / store optimization pass"
  1841. namespace {
  1842. /// Pre- register allocation pass that move load / stores from consecutive
  1843. /// locations close to make it more likely they will be combined later.
  1844. struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
  1845. static char ID;
  1846. AliasAnalysis *AA;
  1847. const DataLayout *TD;
  1848. const TargetInstrInfo *TII;
  1849. const TargetRegisterInfo *TRI;
  1850. const ARMSubtarget *STI;
  1851. MachineRegisterInfo *MRI;
  1852. MachineFunction *MF;
  1853. ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
  1854. bool runOnMachineFunction(MachineFunction &Fn) override;
  1855. StringRef getPassName() const override {
  1856. return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
  1857. }
  1858. void getAnalysisUsage(AnalysisUsage &AU) const override {
  1859. AU.addRequired<AAResultsWrapperPass>();
  1860. MachineFunctionPass::getAnalysisUsage(AU);
  1861. }
  1862. private:
  1863. bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
  1864. unsigned &NewOpc, unsigned &EvenReg,
  1865. unsigned &OddReg, unsigned &BaseReg,
  1866. int &Offset,
  1867. unsigned &PredReg, ARMCC::CondCodes &Pred,
  1868. bool &isT2);
  1869. bool RescheduleOps(MachineBasicBlock *MBB,
  1870. SmallVectorImpl<MachineInstr *> &Ops,
  1871. unsigned Base, bool isLd,
  1872. DenseMap<MachineInstr*, unsigned> &MI2LocMap);
  1873. bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
  1874. };
  1875. } // end anonymous namespace
  1876. char ARMPreAllocLoadStoreOpt::ID = 0;
  1877. INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
  1878. ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
  1879. // Limit the number of instructions to be rescheduled.
  1880. // FIXME: tune this limit, and/or come up with some better heuristics.
  1881. static cl::opt<unsigned> InstReorderLimit("arm-prera-ldst-opt-reorder-limit",
  1882. cl::init(8), cl::Hidden);
  1883. bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  1884. if (AssumeMisalignedLoadStores || skipFunction(Fn.getFunction()))
  1885. return false;
  1886. TD = &Fn.getDataLayout();
  1887. STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
  1888. TII = STI->getInstrInfo();
  1889. TRI = STI->getRegisterInfo();
  1890. MRI = &Fn.getRegInfo();
  1891. MF = &Fn;
  1892. AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
  1893. bool Modified = false;
  1894. for (MachineBasicBlock &MFI : Fn)
  1895. Modified |= RescheduleLoadStoreInstrs(&MFI);
  1896. return Modified;
  1897. }
  1898. static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
  1899. MachineBasicBlock::iterator I,
  1900. MachineBasicBlock::iterator E,
  1901. SmallPtrSetImpl<MachineInstr*> &MemOps,
  1902. SmallSet<unsigned, 4> &MemRegs,
  1903. const TargetRegisterInfo *TRI,
  1904. AliasAnalysis *AA) {
  1905. // Are there stores / loads / calls between them?
  1906. SmallSet<unsigned, 4> AddedRegPressure;
  1907. while (++I != E) {
  1908. if (I->isDebugInstr() || MemOps.count(&*I))
  1909. continue;
  1910. if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
  1911. return false;
  1912. if (I->mayStore() || (!isLd && I->mayLoad()))
  1913. for (MachineInstr *MemOp : MemOps)
  1914. if (I->mayAlias(AA, *MemOp, /*UseTBAA*/ false))
  1915. return false;
  1916. for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
  1917. MachineOperand &MO = I->getOperand(j);
  1918. if (!MO.isReg())
  1919. continue;
  1920. unsigned Reg = MO.getReg();
  1921. if (MO.isDef() && TRI->regsOverlap(Reg, Base))
  1922. return false;
  1923. if (Reg != Base && !MemRegs.count(Reg))
  1924. AddedRegPressure.insert(Reg);
  1925. }
  1926. }
  1927. // Estimate register pressure increase due to the transformation.
  1928. if (MemRegs.size() <= 4)
  1929. // Ok if we are moving small number of instructions.
  1930. return true;
  1931. return AddedRegPressure.size() <= MemRegs.size() * 2;
  1932. }
  1933. bool
  1934. ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
  1935. DebugLoc &dl, unsigned &NewOpc,
  1936. unsigned &FirstReg,
  1937. unsigned &SecondReg,
  1938. unsigned &BaseReg, int &Offset,
  1939. unsigned &PredReg,
  1940. ARMCC::CondCodes &Pred,
  1941. bool &isT2) {
  1942. // Make sure we're allowed to generate LDRD/STRD.
  1943. if (!STI->hasV5TEOps())
  1944. return false;
  1945. // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
  1946. unsigned Scale = 1;
  1947. unsigned Opcode = Op0->getOpcode();
  1948. if (Opcode == ARM::LDRi12) {
  1949. NewOpc = ARM::LDRD;
  1950. } else if (Opcode == ARM::STRi12) {
  1951. NewOpc = ARM::STRD;
  1952. } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
  1953. NewOpc = ARM::t2LDRDi8;
  1954. Scale = 4;
  1955. isT2 = true;
  1956. } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
  1957. NewOpc = ARM::t2STRDi8;
  1958. Scale = 4;
  1959. isT2 = true;
  1960. } else {
  1961. return false;
  1962. }
  1963. // Make sure the base address satisfies i64 ld / st alignment requirement.
  1964. // At the moment, we ignore the memoryoperand's value.
  1965. // If we want to use AliasAnalysis, we should check it accordingly.
  1966. if (!Op0->hasOneMemOperand() ||
  1967. (*Op0->memoperands_begin())->isVolatile() ||
  1968. (*Op0->memoperands_begin())->isAtomic())
  1969. return false;
  1970. unsigned Align = (*Op0->memoperands_begin())->getAlignment();
  1971. const Function &Func = MF->getFunction();
  1972. unsigned ReqAlign = STI->hasV6Ops()
  1973. ? TD->getABITypeAlignment(Type::getInt64Ty(Func.getContext()))
  1974. : 8; // Pre-v6 need 8-byte align
  1975. if (Align < ReqAlign)
  1976. return false;
  1977. // Then make sure the immediate offset fits.
  1978. int OffImm = getMemoryOpOffset(*Op0);
  1979. if (isT2) {
  1980. int Limit = (1 << 8) * Scale;
  1981. if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
  1982. return false;
  1983. Offset = OffImm;
  1984. } else {
  1985. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  1986. if (OffImm < 0) {
  1987. AddSub = ARM_AM::sub;
  1988. OffImm = - OffImm;
  1989. }
  1990. int Limit = (1 << 8) * Scale;
  1991. if (OffImm >= Limit || (OffImm & (Scale-1)))
  1992. return false;
  1993. Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
  1994. }
  1995. FirstReg = Op0->getOperand(0).getReg();
  1996. SecondReg = Op1->getOperand(0).getReg();
  1997. if (FirstReg == SecondReg)
  1998. return false;
  1999. BaseReg = Op0->getOperand(1).getReg();
  2000. Pred = getInstrPredicate(*Op0, PredReg);
  2001. dl = Op0->getDebugLoc();
  2002. return true;
  2003. }
  2004. bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
  2005. SmallVectorImpl<MachineInstr *> &Ops,
  2006. unsigned Base, bool isLd,
  2007. DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
  2008. bool RetVal = false;
  2009. // Sort by offset (in reverse order).
  2010. llvm::sort(Ops, [](const MachineInstr *LHS, const MachineInstr *RHS) {
  2011. int LOffset = getMemoryOpOffset(*LHS);
  2012. int ROffset = getMemoryOpOffset(*RHS);
  2013. assert(LHS == RHS || LOffset != ROffset);
  2014. return LOffset > ROffset;
  2015. });
  2016. // The loads / stores of the same base are in order. Scan them from first to
  2017. // last and check for the following:
  2018. // 1. Any def of base.
  2019. // 2. Any gaps.
  2020. while (Ops.size() > 1) {
  2021. unsigned FirstLoc = ~0U;
  2022. unsigned LastLoc = 0;
  2023. MachineInstr *FirstOp = nullptr;
  2024. MachineInstr *LastOp = nullptr;
  2025. int LastOffset = 0;
  2026. unsigned LastOpcode = 0;
  2027. unsigned LastBytes = 0;
  2028. unsigned NumMove = 0;
  2029. for (int i = Ops.size() - 1; i >= 0; --i) {
  2030. // Make sure each operation has the same kind.
  2031. MachineInstr *Op = Ops[i];
  2032. unsigned LSMOpcode
  2033. = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
  2034. if (LastOpcode && LSMOpcode != LastOpcode)
  2035. break;
  2036. // Check that we have a continuous set of offsets.
  2037. int Offset = getMemoryOpOffset(*Op);
  2038. unsigned Bytes = getLSMultipleTransferSize(Op);
  2039. if (LastBytes) {
  2040. if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
  2041. break;
  2042. }
  2043. // Don't try to reschedule too many instructions.
  2044. if (NumMove == InstReorderLimit)
  2045. break;
  2046. // Found a mergable instruction; save information about it.
  2047. ++NumMove;
  2048. LastOffset = Offset;
  2049. LastBytes = Bytes;
  2050. LastOpcode = LSMOpcode;
  2051. unsigned Loc = MI2LocMap[Op];
  2052. if (Loc <= FirstLoc) {
  2053. FirstLoc = Loc;
  2054. FirstOp = Op;
  2055. }
  2056. if (Loc >= LastLoc) {
  2057. LastLoc = Loc;
  2058. LastOp = Op;
  2059. }
  2060. }
  2061. if (NumMove <= 1)
  2062. Ops.pop_back();
  2063. else {
  2064. SmallPtrSet<MachineInstr*, 4> MemOps;
  2065. SmallSet<unsigned, 4> MemRegs;
  2066. for (size_t i = Ops.size() - NumMove, e = Ops.size(); i != e; ++i) {
  2067. MemOps.insert(Ops[i]);
  2068. MemRegs.insert(Ops[i]->getOperand(0).getReg());
  2069. }
  2070. // Be conservative, if the instructions are too far apart, don't
  2071. // move them. We want to limit the increase of register pressure.
  2072. bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
  2073. if (DoMove)
  2074. DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
  2075. MemOps, MemRegs, TRI, AA);
  2076. if (!DoMove) {
  2077. for (unsigned i = 0; i != NumMove; ++i)
  2078. Ops.pop_back();
  2079. } else {
  2080. // This is the new location for the loads / stores.
  2081. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
  2082. while (InsertPos != MBB->end() &&
  2083. (MemOps.count(&*InsertPos) || InsertPos->isDebugInstr()))
  2084. ++InsertPos;
  2085. // If we are moving a pair of loads / stores, see if it makes sense
  2086. // to try to allocate a pair of registers that can form register pairs.
  2087. MachineInstr *Op0 = Ops.back();
  2088. MachineInstr *Op1 = Ops[Ops.size()-2];
  2089. unsigned FirstReg = 0, SecondReg = 0;
  2090. unsigned BaseReg = 0, PredReg = 0;
  2091. ARMCC::CondCodes Pred = ARMCC::AL;
  2092. bool isT2 = false;
  2093. unsigned NewOpc = 0;
  2094. int Offset = 0;
  2095. DebugLoc dl;
  2096. if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
  2097. FirstReg, SecondReg, BaseReg,
  2098. Offset, PredReg, Pred, isT2)) {
  2099. Ops.pop_back();
  2100. Ops.pop_back();
  2101. const MCInstrDesc &MCID = TII->get(NewOpc);
  2102. const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
  2103. MRI->constrainRegClass(FirstReg, TRC);
  2104. MRI->constrainRegClass(SecondReg, TRC);
  2105. // Form the pair instruction.
  2106. if (isLd) {
  2107. MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
  2108. .addReg(FirstReg, RegState::Define)
  2109. .addReg(SecondReg, RegState::Define)
  2110. .addReg(BaseReg);
  2111. // FIXME: We're converting from LDRi12 to an insn that still
  2112. // uses addrmode2, so we need an explicit offset reg. It should
  2113. // always by reg0 since we're transforming LDRi12s.
  2114. if (!isT2)
  2115. MIB.addReg(0);
  2116. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  2117. MIB.cloneMergedMemRefs({Op0, Op1});
  2118. LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
  2119. ++NumLDRDFormed;
  2120. } else {
  2121. MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
  2122. .addReg(FirstReg)
  2123. .addReg(SecondReg)
  2124. .addReg(BaseReg);
  2125. // FIXME: We're converting from LDRi12 to an insn that still
  2126. // uses addrmode2, so we need an explicit offset reg. It should
  2127. // always by reg0 since we're transforming STRi12s.
  2128. if (!isT2)
  2129. MIB.addReg(0);
  2130. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  2131. MIB.cloneMergedMemRefs({Op0, Op1});
  2132. LLVM_DEBUG(dbgs() << "Formed " << *MIB << "\n");
  2133. ++NumSTRDFormed;
  2134. }
  2135. MBB->erase(Op0);
  2136. MBB->erase(Op1);
  2137. if (!isT2) {
  2138. // Add register allocation hints to form register pairs.
  2139. MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
  2140. MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
  2141. }
  2142. } else {
  2143. for (unsigned i = 0; i != NumMove; ++i) {
  2144. MachineInstr *Op = Ops.back();
  2145. Ops.pop_back();
  2146. MBB->splice(InsertPos, MBB, Op);
  2147. }
  2148. }
  2149. NumLdStMoved += NumMove;
  2150. RetVal = true;
  2151. }
  2152. }
  2153. }
  2154. return RetVal;
  2155. }
  2156. bool
  2157. ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
  2158. bool RetVal = false;
  2159. DenseMap<MachineInstr*, unsigned> MI2LocMap;
  2160. using MapIt = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>::iterator;
  2161. using Base2InstMap = DenseMap<unsigned, SmallVector<MachineInstr *, 4>>;
  2162. using BaseVec = SmallVector<unsigned, 4>;
  2163. Base2InstMap Base2LdsMap;
  2164. Base2InstMap Base2StsMap;
  2165. BaseVec LdBases;
  2166. BaseVec StBases;
  2167. unsigned Loc = 0;
  2168. MachineBasicBlock::iterator MBBI = MBB->begin();
  2169. MachineBasicBlock::iterator E = MBB->end();
  2170. while (MBBI != E) {
  2171. for (; MBBI != E; ++MBBI) {
  2172. MachineInstr &MI = *MBBI;
  2173. if (MI.isCall() || MI.isTerminator()) {
  2174. // Stop at barriers.
  2175. ++MBBI;
  2176. break;
  2177. }
  2178. if (!MI.isDebugInstr())
  2179. MI2LocMap[&MI] = ++Loc;
  2180. if (!isMemoryOp(MI))
  2181. continue;
  2182. unsigned PredReg = 0;
  2183. if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
  2184. continue;
  2185. int Opc = MI.getOpcode();
  2186. bool isLd = isLoadSingle(Opc);
  2187. unsigned Base = MI.getOperand(1).getReg();
  2188. int Offset = getMemoryOpOffset(MI);
  2189. bool StopHere = false;
  2190. auto FindBases = [&] (Base2InstMap &Base2Ops, BaseVec &Bases) {
  2191. MapIt BI = Base2Ops.find(Base);
  2192. if (BI == Base2Ops.end()) {
  2193. Base2Ops[Base].push_back(&MI);
  2194. Bases.push_back(Base);
  2195. return;
  2196. }
  2197. for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
  2198. if (Offset == getMemoryOpOffset(*BI->second[i])) {
  2199. StopHere = true;
  2200. break;
  2201. }
  2202. }
  2203. if (!StopHere)
  2204. BI->second.push_back(&MI);
  2205. };
  2206. if (isLd)
  2207. FindBases(Base2LdsMap, LdBases);
  2208. else
  2209. FindBases(Base2StsMap, StBases);
  2210. if (StopHere) {
  2211. // Found a duplicate (a base+offset combination that's seen earlier).
  2212. // Backtrack.
  2213. --Loc;
  2214. break;
  2215. }
  2216. }
  2217. // Re-schedule loads.
  2218. for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
  2219. unsigned Base = LdBases[i];
  2220. SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
  2221. if (Lds.size() > 1)
  2222. RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
  2223. }
  2224. // Re-schedule stores.
  2225. for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
  2226. unsigned Base = StBases[i];
  2227. SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
  2228. if (Sts.size() > 1)
  2229. RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
  2230. }
  2231. if (MBBI != E) {
  2232. Base2LdsMap.clear();
  2233. Base2StsMap.clear();
  2234. LdBases.clear();
  2235. StBases.clear();
  2236. }
  2237. }
  2238. return RetVal;
  2239. }
  2240. /// Returns an instance of the load / store optimization pass.
  2241. FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
  2242. if (PreAlloc)
  2243. return new ARMPreAllocLoadStoreOpt();
  2244. return new ARMLoadStoreOpt();
  2245. }