ARMLoadStoreOptimizer.cpp 84 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389
  1. //===-- ARMLoadStoreOptimizer.cpp - ARM load / store opt. pass ------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. /// \file This file contains a pass that performs load / store related peephole
  11. /// optimizations. This pass should be run after register allocation.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "ARM.h"
  15. #include "ARMBaseInstrInfo.h"
  16. #include "ARMBaseRegisterInfo.h"
  17. #include "ARMISelLowering.h"
  18. #include "ARMMachineFunctionInfo.h"
  19. #include "ARMSubtarget.h"
  20. #include "MCTargetDesc/ARMAddressingModes.h"
  21. #include "ThumbRegisterInfo.h"
  22. #include "llvm/ADT/DenseMap.h"
  23. #include "llvm/ADT/STLExtras.h"
  24. #include "llvm/ADT/SmallPtrSet.h"
  25. #include "llvm/ADT/SmallSet.h"
  26. #include "llvm/ADT/SmallVector.h"
  27. #include "llvm/ADT/Statistic.h"
  28. #include "llvm/CodeGen/MachineBasicBlock.h"
  29. #include "llvm/CodeGen/MachineFunctionPass.h"
  30. #include "llvm/CodeGen/MachineInstr.h"
  31. #include "llvm/CodeGen/MachineInstrBuilder.h"
  32. #include "llvm/CodeGen/MachineRegisterInfo.h"
  33. #include "llvm/CodeGen/RegisterClassInfo.h"
  34. #include "llvm/CodeGen/SelectionDAGNodes.h"
  35. #include "llvm/CodeGen/LivePhysRegs.h"
  36. #include "llvm/IR/DataLayout.h"
  37. #include "llvm/IR/DerivedTypes.h"
  38. #include "llvm/IR/Function.h"
  39. #include "llvm/Support/Allocator.h"
  40. #include "llvm/Support/Debug.h"
  41. #include "llvm/Support/ErrorHandling.h"
  42. #include "llvm/Support/raw_ostream.h"
  43. #include "llvm/Target/TargetInstrInfo.h"
  44. #include "llvm/Target/TargetMachine.h"
  45. #include "llvm/Target/TargetRegisterInfo.h"
  46. using namespace llvm;
  47. #define DEBUG_TYPE "arm-ldst-opt"
  48. STATISTIC(NumLDMGened , "Number of ldm instructions generated");
  49. STATISTIC(NumSTMGened , "Number of stm instructions generated");
  50. STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
  51. STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
  52. STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
  53. STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
  54. STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
  55. STATISTIC(NumLDRD2LDM, "Number of ldrd instructions turned back into ldm");
  56. STATISTIC(NumSTRD2STM, "Number of strd instructions turned back into stm");
  57. STATISTIC(NumLDRD2LDR, "Number of ldrd instructions turned back into ldr's");
  58. STATISTIC(NumSTRD2STR, "Number of strd instructions turned back into str's");
  59. /// This switch disables formation of double/multi instructions that could
  60. /// potentially lead to (new) alignment traps even with CCR.UNALIGN_TRP
  61. /// disabled. This can be used to create libraries that are robust even when
  62. /// users provoke undefined behaviour by supplying misaligned pointers.
  63. /// \see mayCombineMisaligned()
  64. static cl::opt<bool>
  65. AssumeMisalignedLoadStores("arm-assume-misaligned-load-store", cl::Hidden,
  66. cl::init(false), cl::desc("Be more conservative in ARM load/store opt"));
  67. #define ARM_LOAD_STORE_OPT_NAME "ARM load / store optimization pass"
  68. namespace {
  69. /// Post- register allocation pass the combine load / store instructions to
  70. /// form ldm / stm instructions.
  71. struct ARMLoadStoreOpt : public MachineFunctionPass {
  72. static char ID;
  73. ARMLoadStoreOpt() : MachineFunctionPass(ID) {}
  74. const MachineFunction *MF;
  75. const TargetInstrInfo *TII;
  76. const TargetRegisterInfo *TRI;
  77. const ARMSubtarget *STI;
  78. const TargetLowering *TL;
  79. ARMFunctionInfo *AFI;
  80. LivePhysRegs LiveRegs;
  81. RegisterClassInfo RegClassInfo;
  82. MachineBasicBlock::const_iterator LiveRegPos;
  83. bool LiveRegsValid;
  84. bool RegClassInfoValid;
  85. bool isThumb1, isThumb2;
  86. bool runOnMachineFunction(MachineFunction &Fn) override;
  87. MachineFunctionProperties getRequiredProperties() const override {
  88. return MachineFunctionProperties().set(
  89. MachineFunctionProperties::Property::NoVRegs);
  90. }
  91. StringRef getPassName() const override { return ARM_LOAD_STORE_OPT_NAME; }
  92. private:
  93. /// A set of load/store MachineInstrs with same base register sorted by
  94. /// offset.
  95. struct MemOpQueueEntry {
  96. MachineInstr *MI;
  97. int Offset; ///< Load/Store offset.
  98. unsigned Position; ///< Position as counted from end of basic block.
  99. MemOpQueueEntry(MachineInstr &MI, int Offset, unsigned Position)
  100. : MI(&MI), Offset(Offset), Position(Position) {}
  101. };
  102. typedef SmallVector<MemOpQueueEntry,8> MemOpQueue;
  103. /// A set of MachineInstrs that fulfill (nearly all) conditions to get
  104. /// merged into a LDM/STM.
  105. struct MergeCandidate {
  106. /// List of instructions ordered by load/store offset.
  107. SmallVector<MachineInstr*, 4> Instrs;
  108. /// Index in Instrs of the instruction being latest in the schedule.
  109. unsigned LatestMIIdx;
  110. /// Index in Instrs of the instruction being earliest in the schedule.
  111. unsigned EarliestMIIdx;
  112. /// Index into the basic block where the merged instruction will be
  113. /// inserted. (See MemOpQueueEntry.Position)
  114. unsigned InsertPos;
  115. /// Whether the instructions can be merged into a ldm/stm instruction.
  116. bool CanMergeToLSMulti;
  117. /// Whether the instructions can be merged into a ldrd/strd instruction.
  118. bool CanMergeToLSDouble;
  119. };
  120. SpecificBumpPtrAllocator<MergeCandidate> Allocator;
  121. SmallVector<const MergeCandidate*,4> Candidates;
  122. SmallVector<MachineInstr*,4> MergeBaseCandidates;
  123. void moveLiveRegsBefore(const MachineBasicBlock &MBB,
  124. MachineBasicBlock::const_iterator Before);
  125. unsigned findFreeReg(const TargetRegisterClass &RegClass);
  126. void UpdateBaseRegUses(MachineBasicBlock &MBB,
  127. MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
  128. unsigned Base, unsigned WordOffset,
  129. ARMCC::CondCodes Pred, unsigned PredReg);
  130. MachineInstr *CreateLoadStoreMulti(
  131. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  132. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  133. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  134. ArrayRef<std::pair<unsigned, bool>> Regs);
  135. MachineInstr *CreateLoadStoreDouble(
  136. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  137. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  138. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  139. ArrayRef<std::pair<unsigned, bool>> Regs) const;
  140. void FormCandidates(const MemOpQueue &MemOps);
  141. MachineInstr *MergeOpsUpdate(const MergeCandidate &Cand);
  142. bool FixInvalidRegPairOp(MachineBasicBlock &MBB,
  143. MachineBasicBlock::iterator &MBBI);
  144. bool MergeBaseUpdateLoadStore(MachineInstr *MI);
  145. bool MergeBaseUpdateLSMultiple(MachineInstr *MI);
  146. bool MergeBaseUpdateLSDouble(MachineInstr &MI) const;
  147. bool LoadStoreMultipleOpti(MachineBasicBlock &MBB);
  148. bool MergeReturnIntoLDM(MachineBasicBlock &MBB);
  149. bool CombineMovBx(MachineBasicBlock &MBB);
  150. };
  151. char ARMLoadStoreOpt::ID = 0;
  152. }
  153. INITIALIZE_PASS(ARMLoadStoreOpt, "arm-ldst-opt", ARM_LOAD_STORE_OPT_NAME, false,
  154. false)
  155. static bool definesCPSR(const MachineInstr &MI) {
  156. for (const auto &MO : MI.operands()) {
  157. if (!MO.isReg())
  158. continue;
  159. if (MO.isDef() && MO.getReg() == ARM::CPSR && !MO.isDead())
  160. // If the instruction has live CPSR def, then it's not safe to fold it
  161. // into load / store.
  162. return true;
  163. }
  164. return false;
  165. }
  166. static int getMemoryOpOffset(const MachineInstr &MI) {
  167. unsigned Opcode = MI.getOpcode();
  168. bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD;
  169. unsigned NumOperands = MI.getDesc().getNumOperands();
  170. unsigned OffField = MI.getOperand(NumOperands - 3).getImm();
  171. if (Opcode == ARM::t2LDRi12 || Opcode == ARM::t2LDRi8 ||
  172. Opcode == ARM::t2STRi12 || Opcode == ARM::t2STRi8 ||
  173. Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8 ||
  174. Opcode == ARM::LDRi12 || Opcode == ARM::STRi12)
  175. return OffField;
  176. // Thumb1 immediate offsets are scaled by 4
  177. if (Opcode == ARM::tLDRi || Opcode == ARM::tSTRi ||
  178. Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi)
  179. return OffField * 4;
  180. int Offset = isAM3 ? ARM_AM::getAM3Offset(OffField)
  181. : ARM_AM::getAM5Offset(OffField) * 4;
  182. ARM_AM::AddrOpc Op = isAM3 ? ARM_AM::getAM3Op(OffField)
  183. : ARM_AM::getAM5Op(OffField);
  184. if (Op == ARM_AM::sub)
  185. return -Offset;
  186. return Offset;
  187. }
  188. static const MachineOperand &getLoadStoreBaseOp(const MachineInstr &MI) {
  189. return MI.getOperand(1);
  190. }
  191. static const MachineOperand &getLoadStoreRegOp(const MachineInstr &MI) {
  192. return MI.getOperand(0);
  193. }
  194. static int getLoadStoreMultipleOpcode(unsigned Opcode, ARM_AM::AMSubMode Mode) {
  195. switch (Opcode) {
  196. default: llvm_unreachable("Unhandled opcode!");
  197. case ARM::LDRi12:
  198. ++NumLDMGened;
  199. switch (Mode) {
  200. default: llvm_unreachable("Unhandled submode!");
  201. case ARM_AM::ia: return ARM::LDMIA;
  202. case ARM_AM::da: return ARM::LDMDA;
  203. case ARM_AM::db: return ARM::LDMDB;
  204. case ARM_AM::ib: return ARM::LDMIB;
  205. }
  206. case ARM::STRi12:
  207. ++NumSTMGened;
  208. switch (Mode) {
  209. default: llvm_unreachable("Unhandled submode!");
  210. case ARM_AM::ia: return ARM::STMIA;
  211. case ARM_AM::da: return ARM::STMDA;
  212. case ARM_AM::db: return ARM::STMDB;
  213. case ARM_AM::ib: return ARM::STMIB;
  214. }
  215. case ARM::tLDRi:
  216. case ARM::tLDRspi:
  217. // tLDMIA is writeback-only - unless the base register is in the input
  218. // reglist.
  219. ++NumLDMGened;
  220. switch (Mode) {
  221. default: llvm_unreachable("Unhandled submode!");
  222. case ARM_AM::ia: return ARM::tLDMIA;
  223. }
  224. case ARM::tSTRi:
  225. case ARM::tSTRspi:
  226. // There is no non-writeback tSTMIA either.
  227. ++NumSTMGened;
  228. switch (Mode) {
  229. default: llvm_unreachable("Unhandled submode!");
  230. case ARM_AM::ia: return ARM::tSTMIA_UPD;
  231. }
  232. case ARM::t2LDRi8:
  233. case ARM::t2LDRi12:
  234. ++NumLDMGened;
  235. switch (Mode) {
  236. default: llvm_unreachable("Unhandled submode!");
  237. case ARM_AM::ia: return ARM::t2LDMIA;
  238. case ARM_AM::db: return ARM::t2LDMDB;
  239. }
  240. case ARM::t2STRi8:
  241. case ARM::t2STRi12:
  242. ++NumSTMGened;
  243. switch (Mode) {
  244. default: llvm_unreachable("Unhandled submode!");
  245. case ARM_AM::ia: return ARM::t2STMIA;
  246. case ARM_AM::db: return ARM::t2STMDB;
  247. }
  248. case ARM::VLDRS:
  249. ++NumVLDMGened;
  250. switch (Mode) {
  251. default: llvm_unreachable("Unhandled submode!");
  252. case ARM_AM::ia: return ARM::VLDMSIA;
  253. case ARM_AM::db: return 0; // Only VLDMSDB_UPD exists.
  254. }
  255. case ARM::VSTRS:
  256. ++NumVSTMGened;
  257. switch (Mode) {
  258. default: llvm_unreachable("Unhandled submode!");
  259. case ARM_AM::ia: return ARM::VSTMSIA;
  260. case ARM_AM::db: return 0; // Only VSTMSDB_UPD exists.
  261. }
  262. case ARM::VLDRD:
  263. ++NumVLDMGened;
  264. switch (Mode) {
  265. default: llvm_unreachable("Unhandled submode!");
  266. case ARM_AM::ia: return ARM::VLDMDIA;
  267. case ARM_AM::db: return 0; // Only VLDMDDB_UPD exists.
  268. }
  269. case ARM::VSTRD:
  270. ++NumVSTMGened;
  271. switch (Mode) {
  272. default: llvm_unreachable("Unhandled submode!");
  273. case ARM_AM::ia: return ARM::VSTMDIA;
  274. case ARM_AM::db: return 0; // Only VSTMDDB_UPD exists.
  275. }
  276. }
  277. }
  278. static ARM_AM::AMSubMode getLoadStoreMultipleSubMode(unsigned Opcode) {
  279. switch (Opcode) {
  280. default: llvm_unreachable("Unhandled opcode!");
  281. case ARM::LDMIA_RET:
  282. case ARM::LDMIA:
  283. case ARM::LDMIA_UPD:
  284. case ARM::STMIA:
  285. case ARM::STMIA_UPD:
  286. case ARM::tLDMIA:
  287. case ARM::tLDMIA_UPD:
  288. case ARM::tSTMIA_UPD:
  289. case ARM::t2LDMIA_RET:
  290. case ARM::t2LDMIA:
  291. case ARM::t2LDMIA_UPD:
  292. case ARM::t2STMIA:
  293. case ARM::t2STMIA_UPD:
  294. case ARM::VLDMSIA:
  295. case ARM::VLDMSIA_UPD:
  296. case ARM::VSTMSIA:
  297. case ARM::VSTMSIA_UPD:
  298. case ARM::VLDMDIA:
  299. case ARM::VLDMDIA_UPD:
  300. case ARM::VSTMDIA:
  301. case ARM::VSTMDIA_UPD:
  302. return ARM_AM::ia;
  303. case ARM::LDMDA:
  304. case ARM::LDMDA_UPD:
  305. case ARM::STMDA:
  306. case ARM::STMDA_UPD:
  307. return ARM_AM::da;
  308. case ARM::LDMDB:
  309. case ARM::LDMDB_UPD:
  310. case ARM::STMDB:
  311. case ARM::STMDB_UPD:
  312. case ARM::t2LDMDB:
  313. case ARM::t2LDMDB_UPD:
  314. case ARM::t2STMDB:
  315. case ARM::t2STMDB_UPD:
  316. case ARM::VLDMSDB_UPD:
  317. case ARM::VSTMSDB_UPD:
  318. case ARM::VLDMDDB_UPD:
  319. case ARM::VSTMDDB_UPD:
  320. return ARM_AM::db;
  321. case ARM::LDMIB:
  322. case ARM::LDMIB_UPD:
  323. case ARM::STMIB:
  324. case ARM::STMIB_UPD:
  325. return ARM_AM::ib;
  326. }
  327. }
  328. static bool isT1i32Load(unsigned Opc) {
  329. return Opc == ARM::tLDRi || Opc == ARM::tLDRspi;
  330. }
  331. static bool isT2i32Load(unsigned Opc) {
  332. return Opc == ARM::t2LDRi12 || Opc == ARM::t2LDRi8;
  333. }
  334. static bool isi32Load(unsigned Opc) {
  335. return Opc == ARM::LDRi12 || isT1i32Load(Opc) || isT2i32Load(Opc) ;
  336. }
  337. static bool isT1i32Store(unsigned Opc) {
  338. return Opc == ARM::tSTRi || Opc == ARM::tSTRspi;
  339. }
  340. static bool isT2i32Store(unsigned Opc) {
  341. return Opc == ARM::t2STRi12 || Opc == ARM::t2STRi8;
  342. }
  343. static bool isi32Store(unsigned Opc) {
  344. return Opc == ARM::STRi12 || isT1i32Store(Opc) || isT2i32Store(Opc);
  345. }
  346. static bool isLoadSingle(unsigned Opc) {
  347. return isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
  348. }
  349. static unsigned getImmScale(unsigned Opc) {
  350. switch (Opc) {
  351. default: llvm_unreachable("Unhandled opcode!");
  352. case ARM::tLDRi:
  353. case ARM::tSTRi:
  354. case ARM::tLDRspi:
  355. case ARM::tSTRspi:
  356. return 1;
  357. case ARM::tLDRHi:
  358. case ARM::tSTRHi:
  359. return 2;
  360. case ARM::tLDRBi:
  361. case ARM::tSTRBi:
  362. return 4;
  363. }
  364. }
  365. static unsigned getLSMultipleTransferSize(const MachineInstr *MI) {
  366. switch (MI->getOpcode()) {
  367. default: return 0;
  368. case ARM::LDRi12:
  369. case ARM::STRi12:
  370. case ARM::tLDRi:
  371. case ARM::tSTRi:
  372. case ARM::tLDRspi:
  373. case ARM::tSTRspi:
  374. case ARM::t2LDRi8:
  375. case ARM::t2LDRi12:
  376. case ARM::t2STRi8:
  377. case ARM::t2STRi12:
  378. case ARM::VLDRS:
  379. case ARM::VSTRS:
  380. return 4;
  381. case ARM::VLDRD:
  382. case ARM::VSTRD:
  383. return 8;
  384. case ARM::LDMIA:
  385. case ARM::LDMDA:
  386. case ARM::LDMDB:
  387. case ARM::LDMIB:
  388. case ARM::STMIA:
  389. case ARM::STMDA:
  390. case ARM::STMDB:
  391. case ARM::STMIB:
  392. case ARM::tLDMIA:
  393. case ARM::tLDMIA_UPD:
  394. case ARM::tSTMIA_UPD:
  395. case ARM::t2LDMIA:
  396. case ARM::t2LDMDB:
  397. case ARM::t2STMIA:
  398. case ARM::t2STMDB:
  399. case ARM::VLDMSIA:
  400. case ARM::VSTMSIA:
  401. return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 4;
  402. case ARM::VLDMDIA:
  403. case ARM::VSTMDIA:
  404. return (MI->getNumOperands() - MI->getDesc().getNumOperands() + 1) * 8;
  405. }
  406. }
  407. /// Update future uses of the base register with the offset introduced
  408. /// due to writeback. This function only works on Thumb1.
  409. void ARMLoadStoreOpt::UpdateBaseRegUses(MachineBasicBlock &MBB,
  410. MachineBasicBlock::iterator MBBI,
  411. const DebugLoc &DL, unsigned Base,
  412. unsigned WordOffset,
  413. ARMCC::CondCodes Pred,
  414. unsigned PredReg) {
  415. assert(isThumb1 && "Can only update base register uses for Thumb1!");
  416. // Start updating any instructions with immediate offsets. Insert a SUB before
  417. // the first non-updateable instruction (if any).
  418. for (; MBBI != MBB.end(); ++MBBI) {
  419. bool InsertSub = false;
  420. unsigned Opc = MBBI->getOpcode();
  421. if (MBBI->readsRegister(Base)) {
  422. int Offset;
  423. bool IsLoad =
  424. Opc == ARM::tLDRi || Opc == ARM::tLDRHi || Opc == ARM::tLDRBi;
  425. bool IsStore =
  426. Opc == ARM::tSTRi || Opc == ARM::tSTRHi || Opc == ARM::tSTRBi;
  427. if (IsLoad || IsStore) {
  428. // Loads and stores with immediate offsets can be updated, but only if
  429. // the new offset isn't negative.
  430. // The MachineOperand containing the offset immediate is the last one
  431. // before predicates.
  432. MachineOperand &MO =
  433. MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
  434. // The offsets are scaled by 1, 2 or 4 depending on the Opcode.
  435. Offset = MO.getImm() - WordOffset * getImmScale(Opc);
  436. // If storing the base register, it needs to be reset first.
  437. unsigned InstrSrcReg = getLoadStoreRegOp(*MBBI).getReg();
  438. if (Offset >= 0 && !(IsStore && InstrSrcReg == Base))
  439. MO.setImm(Offset);
  440. else
  441. InsertSub = true;
  442. } else if ((Opc == ARM::tSUBi8 || Opc == ARM::tADDi8) &&
  443. !definesCPSR(*MBBI)) {
  444. // SUBS/ADDS using this register, with a dead def of the CPSR.
  445. // Merge it with the update; if the merged offset is too large,
  446. // insert a new sub instead.
  447. MachineOperand &MO =
  448. MBBI->getOperand(MBBI->getDesc().getNumOperands() - 3);
  449. Offset = (Opc == ARM::tSUBi8) ?
  450. MO.getImm() + WordOffset * 4 :
  451. MO.getImm() - WordOffset * 4 ;
  452. if (Offset >= 0 && TL->isLegalAddImmediate(Offset)) {
  453. // FIXME: Swap ADDS<->SUBS if Offset < 0, erase instruction if
  454. // Offset == 0.
  455. MO.setImm(Offset);
  456. // The base register has now been reset, so exit early.
  457. return;
  458. } else {
  459. InsertSub = true;
  460. }
  461. } else {
  462. // Can't update the instruction.
  463. InsertSub = true;
  464. }
  465. } else if (definesCPSR(*MBBI) || MBBI->isCall() || MBBI->isBranch()) {
  466. // Since SUBS sets the condition flags, we can't place the base reset
  467. // after an instruction that has a live CPSR def.
  468. // The base register might also contain an argument for a function call.
  469. InsertSub = true;
  470. }
  471. if (InsertSub) {
  472. // An instruction above couldn't be updated, so insert a sub.
  473. AddDefaultT1CC(BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
  474. .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
  475. return;
  476. }
  477. if (MBBI->killsRegister(Base) || MBBI->definesRegister(Base))
  478. // Register got killed. Stop updating.
  479. return;
  480. }
  481. // End of block was reached.
  482. if (MBB.succ_size() > 0) {
  483. // FIXME: Because of a bug, live registers are sometimes missing from
  484. // the successor blocks' live-in sets. This means we can't trust that
  485. // information and *always* have to reset at the end of a block.
  486. // See PR21029.
  487. if (MBBI != MBB.end()) --MBBI;
  488. AddDefaultT1CC(
  489. BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBi8), Base), true)
  490. .addReg(Base).addImm(WordOffset * 4).addImm(Pred).addReg(PredReg);
  491. }
  492. }
  493. /// Return the first register of class \p RegClass that is not in \p Regs.
  494. unsigned ARMLoadStoreOpt::findFreeReg(const TargetRegisterClass &RegClass) {
  495. if (!RegClassInfoValid) {
  496. RegClassInfo.runOnMachineFunction(*MF);
  497. RegClassInfoValid = true;
  498. }
  499. for (unsigned Reg : RegClassInfo.getOrder(&RegClass))
  500. if (!LiveRegs.contains(Reg))
  501. return Reg;
  502. return 0;
  503. }
  504. /// Compute live registers just before instruction \p Before (in normal schedule
  505. /// direction). Computes backwards so multiple queries in the same block must
  506. /// come in reverse order.
  507. void ARMLoadStoreOpt::moveLiveRegsBefore(const MachineBasicBlock &MBB,
  508. MachineBasicBlock::const_iterator Before) {
  509. // Initialize if we never queried in this block.
  510. if (!LiveRegsValid) {
  511. LiveRegs.init(*TRI);
  512. LiveRegs.addLiveOuts(MBB);
  513. LiveRegPos = MBB.end();
  514. LiveRegsValid = true;
  515. }
  516. // Move backward just before the "Before" position.
  517. while (LiveRegPos != Before) {
  518. --LiveRegPos;
  519. LiveRegs.stepBackward(*LiveRegPos);
  520. }
  521. }
  522. static bool ContainsReg(const ArrayRef<std::pair<unsigned, bool>> &Regs,
  523. unsigned Reg) {
  524. for (const std::pair<unsigned, bool> &R : Regs)
  525. if (R.first == Reg)
  526. return true;
  527. return false;
  528. }
  529. /// Create and insert a LDM or STM with Base as base register and registers in
  530. /// Regs as the register operands that would be loaded / stored. It returns
  531. /// true if the transformation is done.
  532. MachineInstr *ARMLoadStoreOpt::CreateLoadStoreMulti(
  533. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  534. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  535. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  536. ArrayRef<std::pair<unsigned, bool>> Regs) {
  537. unsigned NumRegs = Regs.size();
  538. assert(NumRegs > 1);
  539. // For Thumb1 targets, it might be necessary to clobber the CPSR to merge.
  540. // Compute liveness information for that register to make the decision.
  541. bool SafeToClobberCPSR = !isThumb1 ||
  542. (MBB.computeRegisterLiveness(TRI, ARM::CPSR, InsertBefore, 20) ==
  543. MachineBasicBlock::LQR_Dead);
  544. bool Writeback = isThumb1; // Thumb1 LDM/STM have base reg writeback.
  545. // Exception: If the base register is in the input reglist, Thumb1 LDM is
  546. // non-writeback.
  547. // It's also not possible to merge an STR of the base register in Thumb1.
  548. if (isThumb1 && isi32Load(Opcode) && ContainsReg(Regs, Base)) {
  549. assert(Base != ARM::SP && "Thumb1 does not allow SP in register list");
  550. if (Opcode == ARM::tLDRi) {
  551. Writeback = false;
  552. } else if (Opcode == ARM::tSTRi) {
  553. return nullptr;
  554. }
  555. }
  556. ARM_AM::AMSubMode Mode = ARM_AM::ia;
  557. // VFP and Thumb2 do not support IB or DA modes. Thumb1 only supports IA.
  558. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
  559. bool haveIBAndDA = isNotVFP && !isThumb2 && !isThumb1;
  560. if (Offset == 4 && haveIBAndDA) {
  561. Mode = ARM_AM::ib;
  562. } else if (Offset == -4 * (int)NumRegs + 4 && haveIBAndDA) {
  563. Mode = ARM_AM::da;
  564. } else if (Offset == -4 * (int)NumRegs && isNotVFP && !isThumb1) {
  565. // VLDM/VSTM do not support DB mode without also updating the base reg.
  566. Mode = ARM_AM::db;
  567. } else if (Offset != 0 || Opcode == ARM::tLDRspi || Opcode == ARM::tSTRspi) {
  568. // Check if this is a supported opcode before inserting instructions to
  569. // calculate a new base register.
  570. if (!getLoadStoreMultipleOpcode(Opcode, Mode)) return nullptr;
  571. // If starting offset isn't zero, insert a MI to materialize a new base.
  572. // But only do so if it is cost effective, i.e. merging more than two
  573. // loads / stores.
  574. if (NumRegs <= 2)
  575. return nullptr;
  576. // On Thumb1, it's not worth materializing a new base register without
  577. // clobbering the CPSR (i.e. not using ADDS/SUBS).
  578. if (!SafeToClobberCPSR)
  579. return nullptr;
  580. unsigned NewBase;
  581. if (isi32Load(Opcode)) {
  582. // If it is a load, then just use one of the destination registers
  583. // as the new base. Will no longer be writeback in Thumb1.
  584. NewBase = Regs[NumRegs-1].first;
  585. Writeback = false;
  586. } else {
  587. // Find a free register that we can use as scratch register.
  588. moveLiveRegsBefore(MBB, InsertBefore);
  589. // The merged instruction does not exist yet but will use several Regs if
  590. // it is a Store.
  591. if (!isLoadSingle(Opcode))
  592. for (const std::pair<unsigned, bool> &R : Regs)
  593. LiveRegs.addReg(R.first);
  594. NewBase = findFreeReg(isThumb1 ? ARM::tGPRRegClass : ARM::GPRRegClass);
  595. if (NewBase == 0)
  596. return nullptr;
  597. }
  598. int BaseOpc =
  599. isThumb2 ? ARM::t2ADDri :
  600. (isThumb1 && Base == ARM::SP) ? ARM::tADDrSPi :
  601. (isThumb1 && Offset < 8) ? ARM::tADDi3 :
  602. isThumb1 ? ARM::tADDi8 : ARM::ADDri;
  603. if (Offset < 0) {
  604. Offset = - Offset;
  605. BaseOpc =
  606. isThumb2 ? ARM::t2SUBri :
  607. (isThumb1 && Offset < 8 && Base != ARM::SP) ? ARM::tSUBi3 :
  608. isThumb1 ? ARM::tSUBi8 : ARM::SUBri;
  609. }
  610. if (!TL->isLegalAddImmediate(Offset))
  611. // FIXME: Try add with register operand?
  612. return nullptr; // Probably not worth it then.
  613. // We can only append a kill flag to the add/sub input if the value is not
  614. // used in the register list of the stm as well.
  615. bool KillOldBase = BaseKill &&
  616. (!isi32Store(Opcode) || !ContainsReg(Regs, Base));
  617. if (isThumb1) {
  618. // Thumb1: depending on immediate size, use either
  619. // ADDS NewBase, Base, #imm3
  620. // or
  621. // MOV NewBase, Base
  622. // ADDS NewBase, #imm8.
  623. if (Base != NewBase &&
  624. (BaseOpc == ARM::tADDi8 || BaseOpc == ARM::tSUBi8)) {
  625. // Need to insert a MOV to the new base first.
  626. if (isARMLowRegister(NewBase) && isARMLowRegister(Base) &&
  627. !STI->hasV6Ops()) {
  628. // thumbv4t doesn't have lo->lo copies, and we can't predicate tMOVSr
  629. if (Pred != ARMCC::AL)
  630. return nullptr;
  631. BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVSr), NewBase)
  632. .addReg(Base, getKillRegState(KillOldBase));
  633. } else
  634. BuildMI(MBB, InsertBefore, DL, TII->get(ARM::tMOVr), NewBase)
  635. .addReg(Base, getKillRegState(KillOldBase))
  636. .addImm(Pred).addReg(PredReg);
  637. // The following ADDS/SUBS becomes an update.
  638. Base = NewBase;
  639. KillOldBase = true;
  640. }
  641. if (BaseOpc == ARM::tADDrSPi) {
  642. assert(Offset % 4 == 0 && "tADDrSPi offset is scaled by 4");
  643. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  644. .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset/4)
  645. .addImm(Pred).addReg(PredReg);
  646. } else
  647. AddDefaultT1CC(
  648. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase), true)
  649. .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
  650. .addImm(Pred).addReg(PredReg);
  651. } else {
  652. BuildMI(MBB, InsertBefore, DL, TII->get(BaseOpc), NewBase)
  653. .addReg(Base, getKillRegState(KillOldBase)).addImm(Offset)
  654. .addImm(Pred).addReg(PredReg).addReg(0);
  655. }
  656. Base = NewBase;
  657. BaseKill = true; // New base is always killed straight away.
  658. }
  659. bool isDef = isLoadSingle(Opcode);
  660. // Get LS multiple opcode. Note that for Thumb1 this might be an opcode with
  661. // base register writeback.
  662. Opcode = getLoadStoreMultipleOpcode(Opcode, Mode);
  663. if (!Opcode)
  664. return nullptr;
  665. // Check if a Thumb1 LDM/STM merge is safe. This is the case if:
  666. // - There is no writeback (LDM of base register),
  667. // - the base register is killed by the merged instruction,
  668. // - or it's safe to overwrite the condition flags, i.e. to insert a SUBS
  669. // to reset the base register.
  670. // Otherwise, don't merge.
  671. // It's safe to return here since the code to materialize a new base register
  672. // above is also conditional on SafeToClobberCPSR.
  673. if (isThumb1 && !SafeToClobberCPSR && Writeback && !BaseKill)
  674. return nullptr;
  675. MachineInstrBuilder MIB;
  676. if (Writeback) {
  677. assert(isThumb1 && "expected Writeback only inThumb1");
  678. if (Opcode == ARM::tLDMIA) {
  679. assert(!(ContainsReg(Regs, Base)) && "Thumb1 can't LDM ! with Base in Regs");
  680. // Update tLDMIA with writeback if necessary.
  681. Opcode = ARM::tLDMIA_UPD;
  682. }
  683. MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
  684. // Thumb1: we might need to set base writeback when building the MI.
  685. MIB.addReg(Base, getDefRegState(true))
  686. .addReg(Base, getKillRegState(BaseKill));
  687. // The base isn't dead after a merged instruction with writeback.
  688. // Insert a sub instruction after the newly formed instruction to reset.
  689. if (!BaseKill)
  690. UpdateBaseRegUses(MBB, InsertBefore, DL, Base, NumRegs, Pred, PredReg);
  691. } else {
  692. // No writeback, simply build the MachineInstr.
  693. MIB = BuildMI(MBB, InsertBefore, DL, TII->get(Opcode));
  694. MIB.addReg(Base, getKillRegState(BaseKill));
  695. }
  696. MIB.addImm(Pred).addReg(PredReg);
  697. for (const std::pair<unsigned, bool> &R : Regs)
  698. MIB.addReg(R.first, getDefRegState(isDef) | getKillRegState(R.second));
  699. return MIB.getInstr();
  700. }
  701. MachineInstr *ARMLoadStoreOpt::CreateLoadStoreDouble(
  702. MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore,
  703. int Offset, unsigned Base, bool BaseKill, unsigned Opcode,
  704. ARMCC::CondCodes Pred, unsigned PredReg, const DebugLoc &DL,
  705. ArrayRef<std::pair<unsigned, bool>> Regs) const {
  706. bool IsLoad = isi32Load(Opcode);
  707. assert((IsLoad || isi32Store(Opcode)) && "Must have integer load or store");
  708. unsigned LoadStoreOpcode = IsLoad ? ARM::t2LDRDi8 : ARM::t2STRDi8;
  709. assert(Regs.size() == 2);
  710. MachineInstrBuilder MIB = BuildMI(MBB, InsertBefore, DL,
  711. TII->get(LoadStoreOpcode));
  712. if (IsLoad) {
  713. MIB.addReg(Regs[0].first, RegState::Define)
  714. .addReg(Regs[1].first, RegState::Define);
  715. } else {
  716. MIB.addReg(Regs[0].first, getKillRegState(Regs[0].second))
  717. .addReg(Regs[1].first, getKillRegState(Regs[1].second));
  718. }
  719. MIB.addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
  720. return MIB.getInstr();
  721. }
  722. /// Call MergeOps and update MemOps and merges accordingly on success.
  723. MachineInstr *ARMLoadStoreOpt::MergeOpsUpdate(const MergeCandidate &Cand) {
  724. const MachineInstr *First = Cand.Instrs.front();
  725. unsigned Opcode = First->getOpcode();
  726. bool IsLoad = isLoadSingle(Opcode);
  727. SmallVector<std::pair<unsigned, bool>, 8> Regs;
  728. SmallVector<unsigned, 4> ImpDefs;
  729. DenseSet<unsigned> KilledRegs;
  730. DenseSet<unsigned> UsedRegs;
  731. // Determine list of registers and list of implicit super-register defs.
  732. for (const MachineInstr *MI : Cand.Instrs) {
  733. const MachineOperand &MO = getLoadStoreRegOp(*MI);
  734. unsigned Reg = MO.getReg();
  735. bool IsKill = MO.isKill();
  736. if (IsKill)
  737. KilledRegs.insert(Reg);
  738. Regs.push_back(std::make_pair(Reg, IsKill));
  739. UsedRegs.insert(Reg);
  740. if (IsLoad) {
  741. // Collect any implicit defs of super-registers, after merging we can't
  742. // be sure anymore that we properly preserved these live ranges and must
  743. // removed these implicit operands.
  744. for (const MachineOperand &MO : MI->implicit_operands()) {
  745. if (!MO.isReg() || !MO.isDef() || MO.isDead())
  746. continue;
  747. assert(MO.isImplicit());
  748. unsigned DefReg = MO.getReg();
  749. if (is_contained(ImpDefs, DefReg))
  750. continue;
  751. // We can ignore cases where the super-reg is read and written.
  752. if (MI->readsRegister(DefReg))
  753. continue;
  754. ImpDefs.push_back(DefReg);
  755. }
  756. }
  757. }
  758. // Attempt the merge.
  759. typedef MachineBasicBlock::iterator iterator;
  760. MachineInstr *LatestMI = Cand.Instrs[Cand.LatestMIIdx];
  761. iterator InsertBefore = std::next(iterator(LatestMI));
  762. MachineBasicBlock &MBB = *LatestMI->getParent();
  763. unsigned Offset = getMemoryOpOffset(*First);
  764. unsigned Base = getLoadStoreBaseOp(*First).getReg();
  765. bool BaseKill = LatestMI->killsRegister(Base);
  766. unsigned PredReg = 0;
  767. ARMCC::CondCodes Pred = getInstrPredicate(*First, PredReg);
  768. DebugLoc DL = First->getDebugLoc();
  769. MachineInstr *Merged = nullptr;
  770. if (Cand.CanMergeToLSDouble)
  771. Merged = CreateLoadStoreDouble(MBB, InsertBefore, Offset, Base, BaseKill,
  772. Opcode, Pred, PredReg, DL, Regs);
  773. if (!Merged && Cand.CanMergeToLSMulti)
  774. Merged = CreateLoadStoreMulti(MBB, InsertBefore, Offset, Base, BaseKill,
  775. Opcode, Pred, PredReg, DL, Regs);
  776. if (!Merged)
  777. return nullptr;
  778. // Determine earliest instruction that will get removed. We then keep an
  779. // iterator just above it so the following erases don't invalidated it.
  780. iterator EarliestI(Cand.Instrs[Cand.EarliestMIIdx]);
  781. bool EarliestAtBegin = false;
  782. if (EarliestI == MBB.begin()) {
  783. EarliestAtBegin = true;
  784. } else {
  785. EarliestI = std::prev(EarliestI);
  786. }
  787. // Remove instructions which have been merged.
  788. for (MachineInstr *MI : Cand.Instrs)
  789. MBB.erase(MI);
  790. // Determine range between the earliest removed instruction and the new one.
  791. if (EarliestAtBegin)
  792. EarliestI = MBB.begin();
  793. else
  794. EarliestI = std::next(EarliestI);
  795. auto FixupRange = make_range(EarliestI, iterator(Merged));
  796. if (isLoadSingle(Opcode)) {
  797. // If the previous loads defined a super-reg, then we have to mark earlier
  798. // operands undef; Replicate the super-reg def on the merged instruction.
  799. for (MachineInstr &MI : FixupRange) {
  800. for (unsigned &ImpDefReg : ImpDefs) {
  801. for (MachineOperand &MO : MI.implicit_operands()) {
  802. if (!MO.isReg() || MO.getReg() != ImpDefReg)
  803. continue;
  804. if (MO.readsReg())
  805. MO.setIsUndef();
  806. else if (MO.isDef())
  807. ImpDefReg = 0;
  808. }
  809. }
  810. }
  811. MachineInstrBuilder MIB(*Merged->getParent()->getParent(), Merged);
  812. for (unsigned ImpDef : ImpDefs)
  813. MIB.addReg(ImpDef, RegState::ImplicitDefine);
  814. } else {
  815. // Remove kill flags: We are possibly storing the values later now.
  816. assert(isi32Store(Opcode) || Opcode == ARM::VSTRS || Opcode == ARM::VSTRD);
  817. for (MachineInstr &MI : FixupRange) {
  818. for (MachineOperand &MO : MI.uses()) {
  819. if (!MO.isReg() || !MO.isKill())
  820. continue;
  821. if (UsedRegs.count(MO.getReg()))
  822. MO.setIsKill(false);
  823. }
  824. }
  825. assert(ImpDefs.empty());
  826. }
  827. return Merged;
  828. }
  829. static bool isValidLSDoubleOffset(int Offset) {
  830. unsigned Value = abs(Offset);
  831. // t2LDRDi8/t2STRDi8 supports an 8 bit immediate which is internally
  832. // multiplied by 4.
  833. return (Value % 4) == 0 && Value < 1024;
  834. }
  835. /// Return true for loads/stores that can be combined to a double/multi
  836. /// operation without increasing the requirements for alignment.
  837. static bool mayCombineMisaligned(const TargetSubtargetInfo &STI,
  838. const MachineInstr &MI) {
  839. // vldr/vstr trap on misaligned pointers anyway, forming vldm makes no
  840. // difference.
  841. unsigned Opcode = MI.getOpcode();
  842. if (!isi32Load(Opcode) && !isi32Store(Opcode))
  843. return true;
  844. // Stack pointer alignment is out of the programmers control so we can trust
  845. // SP-relative loads/stores.
  846. if (getLoadStoreBaseOp(MI).getReg() == ARM::SP &&
  847. STI.getFrameLowering()->getTransientStackAlignment() >= 4)
  848. return true;
  849. return false;
  850. }
  851. /// Find candidates for load/store multiple merge in list of MemOpQueueEntries.
  852. void ARMLoadStoreOpt::FormCandidates(const MemOpQueue &MemOps) {
  853. const MachineInstr *FirstMI = MemOps[0].MI;
  854. unsigned Opcode = FirstMI->getOpcode();
  855. bool isNotVFP = isi32Load(Opcode) || isi32Store(Opcode);
  856. unsigned Size = getLSMultipleTransferSize(FirstMI);
  857. unsigned SIndex = 0;
  858. unsigned EIndex = MemOps.size();
  859. do {
  860. // Look at the first instruction.
  861. const MachineInstr *MI = MemOps[SIndex].MI;
  862. int Offset = MemOps[SIndex].Offset;
  863. const MachineOperand &PMO = getLoadStoreRegOp(*MI);
  864. unsigned PReg = PMO.getReg();
  865. unsigned PRegNum = PMO.isUndef() ? UINT_MAX : TRI->getEncodingValue(PReg);
  866. unsigned Latest = SIndex;
  867. unsigned Earliest = SIndex;
  868. unsigned Count = 1;
  869. bool CanMergeToLSDouble =
  870. STI->isThumb2() && isNotVFP && isValidLSDoubleOffset(Offset);
  871. // ARM errata 602117: LDRD with base in list may result in incorrect base
  872. // register when interrupted or faulted.
  873. if (STI->isCortexM3() && isi32Load(Opcode) &&
  874. PReg == getLoadStoreBaseOp(*MI).getReg())
  875. CanMergeToLSDouble = false;
  876. bool CanMergeToLSMulti = true;
  877. // On swift vldm/vstm starting with an odd register number as that needs
  878. // more uops than single vldrs.
  879. if (STI->hasSlowOddRegister() && !isNotVFP && (PRegNum % 2) == 1)
  880. CanMergeToLSMulti = false;
  881. // LDRD/STRD do not allow SP/PC. LDM/STM do not support it or have it
  882. // deprecated; LDM to PC is fine but cannot happen here.
  883. if (PReg == ARM::SP || PReg == ARM::PC)
  884. CanMergeToLSMulti = CanMergeToLSDouble = false;
  885. // Should we be conservative?
  886. if (AssumeMisalignedLoadStores && !mayCombineMisaligned(*STI, *MI))
  887. CanMergeToLSMulti = CanMergeToLSDouble = false;
  888. // Merge following instructions where possible.
  889. for (unsigned I = SIndex+1; I < EIndex; ++I, ++Count) {
  890. int NewOffset = MemOps[I].Offset;
  891. if (NewOffset != Offset + (int)Size)
  892. break;
  893. const MachineOperand &MO = getLoadStoreRegOp(*MemOps[I].MI);
  894. unsigned Reg = MO.getReg();
  895. if (Reg == ARM::SP || Reg == ARM::PC)
  896. break;
  897. // See if the current load/store may be part of a multi load/store.
  898. unsigned RegNum = MO.isUndef() ? UINT_MAX : TRI->getEncodingValue(Reg);
  899. bool PartOfLSMulti = CanMergeToLSMulti;
  900. if (PartOfLSMulti) {
  901. // Register numbers must be in ascending order.
  902. if (RegNum <= PRegNum)
  903. PartOfLSMulti = false;
  904. // For VFP / NEON load/store multiples, the registers must be
  905. // consecutive and within the limit on the number of registers per
  906. // instruction.
  907. else if (!isNotVFP && RegNum != PRegNum+1)
  908. PartOfLSMulti = false;
  909. }
  910. // See if the current load/store may be part of a double load/store.
  911. bool PartOfLSDouble = CanMergeToLSDouble && Count <= 1;
  912. if (!PartOfLSMulti && !PartOfLSDouble)
  913. break;
  914. CanMergeToLSMulti &= PartOfLSMulti;
  915. CanMergeToLSDouble &= PartOfLSDouble;
  916. // Track MemOp with latest and earliest position (Positions are
  917. // counted in reverse).
  918. unsigned Position = MemOps[I].Position;
  919. if (Position < MemOps[Latest].Position)
  920. Latest = I;
  921. else if (Position > MemOps[Earliest].Position)
  922. Earliest = I;
  923. // Prepare for next MemOp.
  924. Offset += Size;
  925. PRegNum = RegNum;
  926. }
  927. // Form a candidate from the Ops collected so far.
  928. MergeCandidate *Candidate = new(Allocator.Allocate()) MergeCandidate;
  929. for (unsigned C = SIndex, CE = SIndex + Count; C < CE; ++C)
  930. Candidate->Instrs.push_back(MemOps[C].MI);
  931. Candidate->LatestMIIdx = Latest - SIndex;
  932. Candidate->EarliestMIIdx = Earliest - SIndex;
  933. Candidate->InsertPos = MemOps[Latest].Position;
  934. if (Count == 1)
  935. CanMergeToLSMulti = CanMergeToLSDouble = false;
  936. Candidate->CanMergeToLSMulti = CanMergeToLSMulti;
  937. Candidate->CanMergeToLSDouble = CanMergeToLSDouble;
  938. Candidates.push_back(Candidate);
  939. // Continue after the chain.
  940. SIndex += Count;
  941. } while (SIndex < EIndex);
  942. }
  943. static unsigned getUpdatingLSMultipleOpcode(unsigned Opc,
  944. ARM_AM::AMSubMode Mode) {
  945. switch (Opc) {
  946. default: llvm_unreachable("Unhandled opcode!");
  947. case ARM::LDMIA:
  948. case ARM::LDMDA:
  949. case ARM::LDMDB:
  950. case ARM::LDMIB:
  951. switch (Mode) {
  952. default: llvm_unreachable("Unhandled submode!");
  953. case ARM_AM::ia: return ARM::LDMIA_UPD;
  954. case ARM_AM::ib: return ARM::LDMIB_UPD;
  955. case ARM_AM::da: return ARM::LDMDA_UPD;
  956. case ARM_AM::db: return ARM::LDMDB_UPD;
  957. }
  958. case ARM::STMIA:
  959. case ARM::STMDA:
  960. case ARM::STMDB:
  961. case ARM::STMIB:
  962. switch (Mode) {
  963. default: llvm_unreachable("Unhandled submode!");
  964. case ARM_AM::ia: return ARM::STMIA_UPD;
  965. case ARM_AM::ib: return ARM::STMIB_UPD;
  966. case ARM_AM::da: return ARM::STMDA_UPD;
  967. case ARM_AM::db: return ARM::STMDB_UPD;
  968. }
  969. case ARM::t2LDMIA:
  970. case ARM::t2LDMDB:
  971. switch (Mode) {
  972. default: llvm_unreachable("Unhandled submode!");
  973. case ARM_AM::ia: return ARM::t2LDMIA_UPD;
  974. case ARM_AM::db: return ARM::t2LDMDB_UPD;
  975. }
  976. case ARM::t2STMIA:
  977. case ARM::t2STMDB:
  978. switch (Mode) {
  979. default: llvm_unreachable("Unhandled submode!");
  980. case ARM_AM::ia: return ARM::t2STMIA_UPD;
  981. case ARM_AM::db: return ARM::t2STMDB_UPD;
  982. }
  983. case ARM::VLDMSIA:
  984. switch (Mode) {
  985. default: llvm_unreachable("Unhandled submode!");
  986. case ARM_AM::ia: return ARM::VLDMSIA_UPD;
  987. case ARM_AM::db: return ARM::VLDMSDB_UPD;
  988. }
  989. case ARM::VLDMDIA:
  990. switch (Mode) {
  991. default: llvm_unreachable("Unhandled submode!");
  992. case ARM_AM::ia: return ARM::VLDMDIA_UPD;
  993. case ARM_AM::db: return ARM::VLDMDDB_UPD;
  994. }
  995. case ARM::VSTMSIA:
  996. switch (Mode) {
  997. default: llvm_unreachable("Unhandled submode!");
  998. case ARM_AM::ia: return ARM::VSTMSIA_UPD;
  999. case ARM_AM::db: return ARM::VSTMSDB_UPD;
  1000. }
  1001. case ARM::VSTMDIA:
  1002. switch (Mode) {
  1003. default: llvm_unreachable("Unhandled submode!");
  1004. case ARM_AM::ia: return ARM::VSTMDIA_UPD;
  1005. case ARM_AM::db: return ARM::VSTMDDB_UPD;
  1006. }
  1007. }
  1008. }
  1009. /// Check if the given instruction increments or decrements a register and
  1010. /// return the amount it is incremented/decremented. Returns 0 if the CPSR flags
  1011. /// generated by the instruction are possibly read as well.
  1012. static int isIncrementOrDecrement(const MachineInstr &MI, unsigned Reg,
  1013. ARMCC::CondCodes Pred, unsigned PredReg) {
  1014. bool CheckCPSRDef;
  1015. int Scale;
  1016. switch (MI.getOpcode()) {
  1017. case ARM::tADDi8: Scale = 4; CheckCPSRDef = true; break;
  1018. case ARM::tSUBi8: Scale = -4; CheckCPSRDef = true; break;
  1019. case ARM::t2SUBri:
  1020. case ARM::SUBri: Scale = -1; CheckCPSRDef = true; break;
  1021. case ARM::t2ADDri:
  1022. case ARM::ADDri: Scale = 1; CheckCPSRDef = true; break;
  1023. case ARM::tADDspi: Scale = 4; CheckCPSRDef = false; break;
  1024. case ARM::tSUBspi: Scale = -4; CheckCPSRDef = false; break;
  1025. default: return 0;
  1026. }
  1027. unsigned MIPredReg;
  1028. if (MI.getOperand(0).getReg() != Reg ||
  1029. MI.getOperand(1).getReg() != Reg ||
  1030. getInstrPredicate(MI, MIPredReg) != Pred ||
  1031. MIPredReg != PredReg)
  1032. return 0;
  1033. if (CheckCPSRDef && definesCPSR(MI))
  1034. return 0;
  1035. return MI.getOperand(2).getImm() * Scale;
  1036. }
  1037. /// Searches for an increment or decrement of \p Reg before \p MBBI.
  1038. static MachineBasicBlock::iterator
  1039. findIncDecBefore(MachineBasicBlock::iterator MBBI, unsigned Reg,
  1040. ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
  1041. Offset = 0;
  1042. MachineBasicBlock &MBB = *MBBI->getParent();
  1043. MachineBasicBlock::iterator BeginMBBI = MBB.begin();
  1044. MachineBasicBlock::iterator EndMBBI = MBB.end();
  1045. if (MBBI == BeginMBBI)
  1046. return EndMBBI;
  1047. // Skip debug values.
  1048. MachineBasicBlock::iterator PrevMBBI = std::prev(MBBI);
  1049. while (PrevMBBI->isDebugValue() && PrevMBBI != BeginMBBI)
  1050. --PrevMBBI;
  1051. Offset = isIncrementOrDecrement(*PrevMBBI, Reg, Pred, PredReg);
  1052. return Offset == 0 ? EndMBBI : PrevMBBI;
  1053. }
  1054. /// Searches for a increment or decrement of \p Reg after \p MBBI.
  1055. static MachineBasicBlock::iterator
  1056. findIncDecAfter(MachineBasicBlock::iterator MBBI, unsigned Reg,
  1057. ARMCC::CondCodes Pred, unsigned PredReg, int &Offset) {
  1058. Offset = 0;
  1059. MachineBasicBlock &MBB = *MBBI->getParent();
  1060. MachineBasicBlock::iterator EndMBBI = MBB.end();
  1061. MachineBasicBlock::iterator NextMBBI = std::next(MBBI);
  1062. // Skip debug values.
  1063. while (NextMBBI != EndMBBI && NextMBBI->isDebugValue())
  1064. ++NextMBBI;
  1065. if (NextMBBI == EndMBBI)
  1066. return EndMBBI;
  1067. Offset = isIncrementOrDecrement(*NextMBBI, Reg, Pred, PredReg);
  1068. return Offset == 0 ? EndMBBI : NextMBBI;
  1069. }
  1070. /// Fold proceeding/trailing inc/dec of base register into the
  1071. /// LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
  1072. ///
  1073. /// stmia rn, <ra, rb, rc>
  1074. /// rn := rn + 4 * 3;
  1075. /// =>
  1076. /// stmia rn!, <ra, rb, rc>
  1077. ///
  1078. /// rn := rn - 4 * 3;
  1079. /// ldmia rn, <ra, rb, rc>
  1080. /// =>
  1081. /// ldmdb rn!, <ra, rb, rc>
  1082. bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineInstr *MI) {
  1083. // Thumb1 is already using updating loads/stores.
  1084. if (isThumb1) return false;
  1085. const MachineOperand &BaseOP = MI->getOperand(0);
  1086. unsigned Base = BaseOP.getReg();
  1087. bool BaseKill = BaseOP.isKill();
  1088. unsigned PredReg = 0;
  1089. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1090. unsigned Opcode = MI->getOpcode();
  1091. DebugLoc DL = MI->getDebugLoc();
  1092. // Can't use an updating ld/st if the base register is also a dest
  1093. // register. e.g. ldmdb r0!, {r0, r1, r2}. The behavior is undefined.
  1094. for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
  1095. if (MI->getOperand(i).getReg() == Base)
  1096. return false;
  1097. int Bytes = getLSMultipleTransferSize(MI);
  1098. MachineBasicBlock &MBB = *MI->getParent();
  1099. MachineBasicBlock::iterator MBBI(MI);
  1100. int Offset;
  1101. MachineBasicBlock::iterator MergeInstr
  1102. = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
  1103. ARM_AM::AMSubMode Mode = getLoadStoreMultipleSubMode(Opcode);
  1104. if (Mode == ARM_AM::ia && Offset == -Bytes) {
  1105. Mode = ARM_AM::db;
  1106. } else if (Mode == ARM_AM::ib && Offset == -Bytes) {
  1107. Mode = ARM_AM::da;
  1108. } else {
  1109. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
  1110. if (((Mode != ARM_AM::ia && Mode != ARM_AM::ib) || Offset != Bytes) &&
  1111. ((Mode != ARM_AM::da && Mode != ARM_AM::db) || Offset != -Bytes)) {
  1112. // We couldn't find an inc/dec to merge. But if the base is dead, we
  1113. // can still change to a writeback form as that will save us 2 bytes
  1114. // of code size. It can create WAW hazards though, so only do it if
  1115. // we're minimizing code size.
  1116. if (!MBB.getParent()->getFunction()->optForMinSize() || !BaseKill)
  1117. return false;
  1118. bool HighRegsUsed = false;
  1119. for (unsigned i = 2, e = MI->getNumOperands(); i != e; ++i)
  1120. if (MI->getOperand(i).getReg() >= ARM::R8) {
  1121. HighRegsUsed = true;
  1122. break;
  1123. }
  1124. if (!HighRegsUsed)
  1125. MergeInstr = MBB.end();
  1126. else
  1127. return false;
  1128. }
  1129. }
  1130. if (MergeInstr != MBB.end())
  1131. MBB.erase(MergeInstr);
  1132. unsigned NewOpc = getUpdatingLSMultipleOpcode(Opcode, Mode);
  1133. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
  1134. .addReg(Base, getDefRegState(true)) // WB base register
  1135. .addReg(Base, getKillRegState(BaseKill))
  1136. .addImm(Pred).addReg(PredReg);
  1137. // Transfer the rest of operands.
  1138. for (unsigned OpNum = 3, e = MI->getNumOperands(); OpNum != e; ++OpNum)
  1139. MIB.addOperand(MI->getOperand(OpNum));
  1140. // Transfer memoperands.
  1141. MIB->setMemRefs(MI->memoperands_begin(), MI->memoperands_end());
  1142. MBB.erase(MBBI);
  1143. return true;
  1144. }
  1145. static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc,
  1146. ARM_AM::AddrOpc Mode) {
  1147. switch (Opc) {
  1148. case ARM::LDRi12:
  1149. return ARM::LDR_PRE_IMM;
  1150. case ARM::STRi12:
  1151. return ARM::STR_PRE_IMM;
  1152. case ARM::VLDRS:
  1153. return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
  1154. case ARM::VLDRD:
  1155. return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
  1156. case ARM::VSTRS:
  1157. return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
  1158. case ARM::VSTRD:
  1159. return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
  1160. case ARM::t2LDRi8:
  1161. case ARM::t2LDRi12:
  1162. return ARM::t2LDR_PRE;
  1163. case ARM::t2STRi8:
  1164. case ARM::t2STRi12:
  1165. return ARM::t2STR_PRE;
  1166. default: llvm_unreachable("Unhandled opcode!");
  1167. }
  1168. }
  1169. static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc,
  1170. ARM_AM::AddrOpc Mode) {
  1171. switch (Opc) {
  1172. case ARM::LDRi12:
  1173. return ARM::LDR_POST_IMM;
  1174. case ARM::STRi12:
  1175. return ARM::STR_POST_IMM;
  1176. case ARM::VLDRS:
  1177. return Mode == ARM_AM::add ? ARM::VLDMSIA_UPD : ARM::VLDMSDB_UPD;
  1178. case ARM::VLDRD:
  1179. return Mode == ARM_AM::add ? ARM::VLDMDIA_UPD : ARM::VLDMDDB_UPD;
  1180. case ARM::VSTRS:
  1181. return Mode == ARM_AM::add ? ARM::VSTMSIA_UPD : ARM::VSTMSDB_UPD;
  1182. case ARM::VSTRD:
  1183. return Mode == ARM_AM::add ? ARM::VSTMDIA_UPD : ARM::VSTMDDB_UPD;
  1184. case ARM::t2LDRi8:
  1185. case ARM::t2LDRi12:
  1186. return ARM::t2LDR_POST;
  1187. case ARM::t2STRi8:
  1188. case ARM::t2STRi12:
  1189. return ARM::t2STR_POST;
  1190. default: llvm_unreachable("Unhandled opcode!");
  1191. }
  1192. }
  1193. /// Fold proceeding/trailing inc/dec of base register into the
  1194. /// LDR/STR/FLD{D|S}/FST{D|S} op when possible:
  1195. bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineInstr *MI) {
  1196. // Thumb1 doesn't have updating LDR/STR.
  1197. // FIXME: Use LDM/STM with single register instead.
  1198. if (isThumb1) return false;
  1199. unsigned Base = getLoadStoreBaseOp(*MI).getReg();
  1200. bool BaseKill = getLoadStoreBaseOp(*MI).isKill();
  1201. unsigned Opcode = MI->getOpcode();
  1202. DebugLoc DL = MI->getDebugLoc();
  1203. bool isAM5 = (Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
  1204. Opcode == ARM::VSTRD || Opcode == ARM::VSTRS);
  1205. bool isAM2 = (Opcode == ARM::LDRi12 || Opcode == ARM::STRi12);
  1206. if (isi32Load(Opcode) || isi32Store(Opcode))
  1207. if (MI->getOperand(2).getImm() != 0)
  1208. return false;
  1209. if (isAM5 && ARM_AM::getAM5Offset(MI->getOperand(2).getImm()) != 0)
  1210. return false;
  1211. // Can't do the merge if the destination register is the same as the would-be
  1212. // writeback register.
  1213. if (MI->getOperand(0).getReg() == Base)
  1214. return false;
  1215. unsigned PredReg = 0;
  1216. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1217. int Bytes = getLSMultipleTransferSize(MI);
  1218. MachineBasicBlock &MBB = *MI->getParent();
  1219. MachineBasicBlock::iterator MBBI(MI);
  1220. int Offset;
  1221. MachineBasicBlock::iterator MergeInstr
  1222. = findIncDecBefore(MBBI, Base, Pred, PredReg, Offset);
  1223. unsigned NewOpc;
  1224. if (!isAM5 && Offset == Bytes) {
  1225. NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
  1226. } else if (Offset == -Bytes) {
  1227. NewOpc = getPreIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
  1228. } else {
  1229. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
  1230. if (Offset == Bytes) {
  1231. NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::add);
  1232. } else if (!isAM5 && Offset == -Bytes) {
  1233. NewOpc = getPostIndexedLoadStoreOpcode(Opcode, ARM_AM::sub);
  1234. } else
  1235. return false;
  1236. }
  1237. MBB.erase(MergeInstr);
  1238. ARM_AM::AddrOpc AddSub = Offset < 0 ? ARM_AM::sub : ARM_AM::add;
  1239. bool isLd = isLoadSingle(Opcode);
  1240. if (isAM5) {
  1241. // VLDM[SD]_UPD, VSTM[SD]_UPD
  1242. // (There are no base-updating versions of VLDR/VSTR instructions, but the
  1243. // updating load/store-multiple instructions can be used with only one
  1244. // register.)
  1245. MachineOperand &MO = MI->getOperand(0);
  1246. BuildMI(MBB, MBBI, DL, TII->get(NewOpc))
  1247. .addReg(Base, getDefRegState(true)) // WB base register
  1248. .addReg(Base, getKillRegState(isLd ? BaseKill : false))
  1249. .addImm(Pred).addReg(PredReg)
  1250. .addReg(MO.getReg(), (isLd ? getDefRegState(true) :
  1251. getKillRegState(MO.isKill())));
  1252. } else if (isLd) {
  1253. if (isAM2) {
  1254. // LDR_PRE, LDR_POST
  1255. if (NewOpc == ARM::LDR_PRE_IMM || NewOpc == ARM::LDRB_PRE_IMM) {
  1256. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1257. .addReg(Base, RegState::Define)
  1258. .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
  1259. } else {
  1260. int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
  1261. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1262. .addReg(Base, RegState::Define)
  1263. .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
  1264. }
  1265. } else {
  1266. // t2LDR_PRE, t2LDR_POST
  1267. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), MI->getOperand(0).getReg())
  1268. .addReg(Base, RegState::Define)
  1269. .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
  1270. }
  1271. } else {
  1272. MachineOperand &MO = MI->getOperand(0);
  1273. // FIXME: post-indexed stores use am2offset_imm, which still encodes
  1274. // the vestigal zero-reg offset register. When that's fixed, this clause
  1275. // can be removed entirely.
  1276. if (isAM2 && NewOpc == ARM::STR_POST_IMM) {
  1277. int Imm = ARM_AM::getAM2Opc(AddSub, Bytes, ARM_AM::no_shift);
  1278. // STR_PRE, STR_POST
  1279. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
  1280. .addReg(MO.getReg(), getKillRegState(MO.isKill()))
  1281. .addReg(Base).addReg(0).addImm(Imm).addImm(Pred).addReg(PredReg);
  1282. } else {
  1283. // t2STR_PRE, t2STR_POST
  1284. BuildMI(MBB, MBBI, DL, TII->get(NewOpc), Base)
  1285. .addReg(MO.getReg(), getKillRegState(MO.isKill()))
  1286. .addReg(Base).addImm(Offset).addImm(Pred).addReg(PredReg);
  1287. }
  1288. }
  1289. MBB.erase(MBBI);
  1290. return true;
  1291. }
  1292. bool ARMLoadStoreOpt::MergeBaseUpdateLSDouble(MachineInstr &MI) const {
  1293. unsigned Opcode = MI.getOpcode();
  1294. assert((Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) &&
  1295. "Must have t2STRDi8 or t2LDRDi8");
  1296. if (MI.getOperand(3).getImm() != 0)
  1297. return false;
  1298. // Behaviour for writeback is undefined if base register is the same as one
  1299. // of the others.
  1300. const MachineOperand &BaseOp = MI.getOperand(2);
  1301. unsigned Base = BaseOp.getReg();
  1302. const MachineOperand &Reg0Op = MI.getOperand(0);
  1303. const MachineOperand &Reg1Op = MI.getOperand(1);
  1304. if (Reg0Op.getReg() == Base || Reg1Op.getReg() == Base)
  1305. return false;
  1306. unsigned PredReg;
  1307. ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
  1308. MachineBasicBlock::iterator MBBI(MI);
  1309. MachineBasicBlock &MBB = *MI.getParent();
  1310. int Offset;
  1311. MachineBasicBlock::iterator MergeInstr = findIncDecBefore(MBBI, Base, Pred,
  1312. PredReg, Offset);
  1313. unsigned NewOpc;
  1314. if (Offset == 8 || Offset == -8) {
  1315. NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_PRE : ARM::t2STRD_PRE;
  1316. } else {
  1317. MergeInstr = findIncDecAfter(MBBI, Base, Pred, PredReg, Offset);
  1318. if (Offset == 8 || Offset == -8) {
  1319. NewOpc = Opcode == ARM::t2LDRDi8 ? ARM::t2LDRD_POST : ARM::t2STRD_POST;
  1320. } else
  1321. return false;
  1322. }
  1323. MBB.erase(MergeInstr);
  1324. DebugLoc DL = MI.getDebugLoc();
  1325. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
  1326. if (NewOpc == ARM::t2LDRD_PRE || NewOpc == ARM::t2LDRD_POST) {
  1327. MIB.addOperand(Reg0Op).addOperand(Reg1Op)
  1328. .addReg(BaseOp.getReg(), RegState::Define);
  1329. } else {
  1330. assert(NewOpc == ARM::t2STRD_PRE || NewOpc == ARM::t2STRD_POST);
  1331. MIB.addReg(BaseOp.getReg(), RegState::Define)
  1332. .addOperand(Reg0Op).addOperand(Reg1Op);
  1333. }
  1334. MIB.addReg(BaseOp.getReg(), RegState::Kill)
  1335. .addImm(Offset).addImm(Pred).addReg(PredReg);
  1336. assert(TII->get(Opcode).getNumOperands() == 6 &&
  1337. TII->get(NewOpc).getNumOperands() == 7 &&
  1338. "Unexpected number of operands in Opcode specification.");
  1339. // Transfer implicit operands.
  1340. for (const MachineOperand &MO : MI.implicit_operands())
  1341. MIB.addOperand(MO);
  1342. MIB->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
  1343. MBB.erase(MBBI);
  1344. return true;
  1345. }
  1346. /// Returns true if instruction is a memory operation that this pass is capable
  1347. /// of operating on.
  1348. static bool isMemoryOp(const MachineInstr &MI) {
  1349. unsigned Opcode = MI.getOpcode();
  1350. switch (Opcode) {
  1351. case ARM::VLDRS:
  1352. case ARM::VSTRS:
  1353. case ARM::VLDRD:
  1354. case ARM::VSTRD:
  1355. case ARM::LDRi12:
  1356. case ARM::STRi12:
  1357. case ARM::tLDRi:
  1358. case ARM::tSTRi:
  1359. case ARM::tLDRspi:
  1360. case ARM::tSTRspi:
  1361. case ARM::t2LDRi8:
  1362. case ARM::t2LDRi12:
  1363. case ARM::t2STRi8:
  1364. case ARM::t2STRi12:
  1365. break;
  1366. default:
  1367. return false;
  1368. }
  1369. if (!MI.getOperand(1).isReg())
  1370. return false;
  1371. // When no memory operands are present, conservatively assume unaligned,
  1372. // volatile, unfoldable.
  1373. if (!MI.hasOneMemOperand())
  1374. return false;
  1375. const MachineMemOperand &MMO = **MI.memoperands_begin();
  1376. // Don't touch volatile memory accesses - we may be changing their order.
  1377. if (MMO.isVolatile())
  1378. return false;
  1379. // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
  1380. // not.
  1381. if (MMO.getAlignment() < 4)
  1382. return false;
  1383. // str <undef> could probably be eliminated entirely, but for now we just want
  1384. // to avoid making a mess of it.
  1385. // FIXME: Use str <undef> as a wildcard to enable better stm folding.
  1386. if (MI.getOperand(0).isReg() && MI.getOperand(0).isUndef())
  1387. return false;
  1388. // Likewise don't mess with references to undefined addresses.
  1389. if (MI.getOperand(1).isUndef())
  1390. return false;
  1391. return true;
  1392. }
  1393. static void InsertLDR_STR(MachineBasicBlock &MBB,
  1394. MachineBasicBlock::iterator &MBBI, int Offset,
  1395. bool isDef, const DebugLoc &DL, unsigned NewOpc,
  1396. unsigned Reg, bool RegDeadKill, bool RegUndef,
  1397. unsigned BaseReg, bool BaseKill, bool BaseUndef,
  1398. bool OffKill, bool OffUndef, ARMCC::CondCodes Pred,
  1399. unsigned PredReg, const TargetInstrInfo *TII,
  1400. bool isT2) {
  1401. if (isDef) {
  1402. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
  1403. TII->get(NewOpc))
  1404. .addReg(Reg, getDefRegState(true) | getDeadRegState(RegDeadKill))
  1405. .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
  1406. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  1407. } else {
  1408. MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
  1409. TII->get(NewOpc))
  1410. .addReg(Reg, getKillRegState(RegDeadKill) | getUndefRegState(RegUndef))
  1411. .addReg(BaseReg, getKillRegState(BaseKill)|getUndefRegState(BaseUndef));
  1412. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  1413. }
  1414. }
  1415. bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
  1416. MachineBasicBlock::iterator &MBBI) {
  1417. MachineInstr *MI = &*MBBI;
  1418. unsigned Opcode = MI->getOpcode();
  1419. if (Opcode != ARM::LDRD && Opcode != ARM::STRD && Opcode != ARM::t2LDRDi8)
  1420. return false;
  1421. const MachineOperand &BaseOp = MI->getOperand(2);
  1422. unsigned BaseReg = BaseOp.getReg();
  1423. unsigned EvenReg = MI->getOperand(0).getReg();
  1424. unsigned OddReg = MI->getOperand(1).getReg();
  1425. unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false);
  1426. unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false);
  1427. // ARM errata 602117: LDRD with base in list may result in incorrect base
  1428. // register when interrupted or faulted.
  1429. bool Errata602117 = EvenReg == BaseReg &&
  1430. (Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8) && STI->isCortexM3();
  1431. // ARM LDRD/STRD needs consecutive registers.
  1432. bool NonConsecutiveRegs = (Opcode == ARM::LDRD || Opcode == ARM::STRD) &&
  1433. (EvenRegNum % 2 != 0 || EvenRegNum + 1 != OddRegNum);
  1434. if (!Errata602117 && !NonConsecutiveRegs)
  1435. return false;
  1436. bool isT2 = Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8;
  1437. bool isLd = Opcode == ARM::LDRD || Opcode == ARM::t2LDRDi8;
  1438. bool EvenDeadKill = isLd ?
  1439. MI->getOperand(0).isDead() : MI->getOperand(0).isKill();
  1440. bool EvenUndef = MI->getOperand(0).isUndef();
  1441. bool OddDeadKill = isLd ?
  1442. MI->getOperand(1).isDead() : MI->getOperand(1).isKill();
  1443. bool OddUndef = MI->getOperand(1).isUndef();
  1444. bool BaseKill = BaseOp.isKill();
  1445. bool BaseUndef = BaseOp.isUndef();
  1446. bool OffKill = isT2 ? false : MI->getOperand(3).isKill();
  1447. bool OffUndef = isT2 ? false : MI->getOperand(3).isUndef();
  1448. int OffImm = getMemoryOpOffset(*MI);
  1449. unsigned PredReg = 0;
  1450. ARMCC::CondCodes Pred = getInstrPredicate(*MI, PredReg);
  1451. if (OddRegNum > EvenRegNum && OffImm == 0) {
  1452. // Ascending register numbers and no offset. It's safe to change it to a
  1453. // ldm or stm.
  1454. unsigned NewOpc = (isLd)
  1455. ? (isT2 ? ARM::t2LDMIA : ARM::LDMIA)
  1456. : (isT2 ? ARM::t2STMIA : ARM::STMIA);
  1457. if (isLd) {
  1458. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  1459. .addReg(BaseReg, getKillRegState(BaseKill))
  1460. .addImm(Pred).addReg(PredReg)
  1461. .addReg(EvenReg, getDefRegState(isLd) | getDeadRegState(EvenDeadKill))
  1462. .addReg(OddReg, getDefRegState(isLd) | getDeadRegState(OddDeadKill));
  1463. ++NumLDRD2LDM;
  1464. } else {
  1465. BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc))
  1466. .addReg(BaseReg, getKillRegState(BaseKill))
  1467. .addImm(Pred).addReg(PredReg)
  1468. .addReg(EvenReg,
  1469. getKillRegState(EvenDeadKill) | getUndefRegState(EvenUndef))
  1470. .addReg(OddReg,
  1471. getKillRegState(OddDeadKill) | getUndefRegState(OddUndef));
  1472. ++NumSTRD2STM;
  1473. }
  1474. } else {
  1475. // Split into two instructions.
  1476. unsigned NewOpc = (isLd)
  1477. ? (isT2 ? (OffImm < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
  1478. : (isT2 ? (OffImm < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
  1479. // Be extra careful for thumb2. t2LDRi8 can't reference a zero offset,
  1480. // so adjust and use t2LDRi12 here for that.
  1481. unsigned NewOpc2 = (isLd)
  1482. ? (isT2 ? (OffImm+4 < 0 ? ARM::t2LDRi8 : ARM::t2LDRi12) : ARM::LDRi12)
  1483. : (isT2 ? (OffImm+4 < 0 ? ARM::t2STRi8 : ARM::t2STRi12) : ARM::STRi12);
  1484. DebugLoc dl = MBBI->getDebugLoc();
  1485. // If this is a load and base register is killed, it may have been
  1486. // re-defed by the load, make sure the first load does not clobber it.
  1487. if (isLd &&
  1488. (BaseKill || OffKill) &&
  1489. (TRI->regsOverlap(EvenReg, BaseReg))) {
  1490. assert(!TRI->regsOverlap(OddReg, BaseReg));
  1491. InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
  1492. OddReg, OddDeadKill, false,
  1493. BaseReg, false, BaseUndef, false, OffUndef,
  1494. Pred, PredReg, TII, isT2);
  1495. InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
  1496. EvenReg, EvenDeadKill, false,
  1497. BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
  1498. Pred, PredReg, TII, isT2);
  1499. } else {
  1500. if (OddReg == EvenReg && EvenDeadKill) {
  1501. // If the two source operands are the same, the kill marker is
  1502. // probably on the first one. e.g.
  1503. // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
  1504. EvenDeadKill = false;
  1505. OddDeadKill = true;
  1506. }
  1507. // Never kill the base register in the first instruction.
  1508. if (EvenReg == BaseReg)
  1509. EvenDeadKill = false;
  1510. InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
  1511. EvenReg, EvenDeadKill, EvenUndef,
  1512. BaseReg, false, BaseUndef, false, OffUndef,
  1513. Pred, PredReg, TII, isT2);
  1514. InsertLDR_STR(MBB, MBBI, OffImm+4, isLd, dl, NewOpc2,
  1515. OddReg, OddDeadKill, OddUndef,
  1516. BaseReg, BaseKill, BaseUndef, OffKill, OffUndef,
  1517. Pred, PredReg, TII, isT2);
  1518. }
  1519. if (isLd)
  1520. ++NumLDRD2LDR;
  1521. else
  1522. ++NumSTRD2STR;
  1523. }
  1524. MBBI = MBB.erase(MBBI);
  1525. return true;
  1526. }
  1527. /// An optimization pass to turn multiple LDR / STR ops of the same base and
  1528. /// incrementing offset into LDM / STM ops.
  1529. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) {
  1530. MemOpQueue MemOps;
  1531. unsigned CurrBase = 0;
  1532. unsigned CurrOpc = ~0u;
  1533. ARMCC::CondCodes CurrPred = ARMCC::AL;
  1534. unsigned Position = 0;
  1535. assert(Candidates.size() == 0);
  1536. assert(MergeBaseCandidates.size() == 0);
  1537. LiveRegsValid = false;
  1538. for (MachineBasicBlock::iterator I = MBB.end(), MBBI; I != MBB.begin();
  1539. I = MBBI) {
  1540. // The instruction in front of the iterator is the one we look at.
  1541. MBBI = std::prev(I);
  1542. if (FixInvalidRegPairOp(MBB, MBBI))
  1543. continue;
  1544. ++Position;
  1545. if (isMemoryOp(*MBBI)) {
  1546. unsigned Opcode = MBBI->getOpcode();
  1547. const MachineOperand &MO = MBBI->getOperand(0);
  1548. unsigned Reg = MO.getReg();
  1549. unsigned Base = getLoadStoreBaseOp(*MBBI).getReg();
  1550. unsigned PredReg = 0;
  1551. ARMCC::CondCodes Pred = getInstrPredicate(*MBBI, PredReg);
  1552. int Offset = getMemoryOpOffset(*MBBI);
  1553. if (CurrBase == 0) {
  1554. // Start of a new chain.
  1555. CurrBase = Base;
  1556. CurrOpc = Opcode;
  1557. CurrPred = Pred;
  1558. MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
  1559. continue;
  1560. }
  1561. // Note: No need to match PredReg in the next if.
  1562. if (CurrOpc == Opcode && CurrBase == Base && CurrPred == Pred) {
  1563. // Watch out for:
  1564. // r4 := ldr [r0, #8]
  1565. // r4 := ldr [r0, #4]
  1566. // or
  1567. // r0 := ldr [r0]
  1568. // If a load overrides the base register or a register loaded by
  1569. // another load in our chain, we cannot take this instruction.
  1570. bool Overlap = false;
  1571. if (isLoadSingle(Opcode)) {
  1572. Overlap = (Base == Reg);
  1573. if (!Overlap) {
  1574. for (const MemOpQueueEntry &E : MemOps) {
  1575. if (TRI->regsOverlap(Reg, E.MI->getOperand(0).getReg())) {
  1576. Overlap = true;
  1577. break;
  1578. }
  1579. }
  1580. }
  1581. }
  1582. if (!Overlap) {
  1583. // Check offset and sort memory operation into the current chain.
  1584. if (Offset > MemOps.back().Offset) {
  1585. MemOps.push_back(MemOpQueueEntry(*MBBI, Offset, Position));
  1586. continue;
  1587. } else {
  1588. MemOpQueue::iterator MI, ME;
  1589. for (MI = MemOps.begin(), ME = MemOps.end(); MI != ME; ++MI) {
  1590. if (Offset < MI->Offset) {
  1591. // Found a place to insert.
  1592. break;
  1593. }
  1594. if (Offset == MI->Offset) {
  1595. // Collision, abort.
  1596. MI = ME;
  1597. break;
  1598. }
  1599. }
  1600. if (MI != MemOps.end()) {
  1601. MemOps.insert(MI, MemOpQueueEntry(*MBBI, Offset, Position));
  1602. continue;
  1603. }
  1604. }
  1605. }
  1606. }
  1607. // Don't advance the iterator; The op will start a new chain next.
  1608. MBBI = I;
  1609. --Position;
  1610. // Fallthrough to look into existing chain.
  1611. } else if (MBBI->isDebugValue()) {
  1612. continue;
  1613. } else if (MBBI->getOpcode() == ARM::t2LDRDi8 ||
  1614. MBBI->getOpcode() == ARM::t2STRDi8) {
  1615. // ARMPreAllocLoadStoreOpt has already formed some LDRD/STRD instructions
  1616. // remember them because we may still be able to merge add/sub into them.
  1617. MergeBaseCandidates.push_back(&*MBBI);
  1618. }
  1619. // If we are here then the chain is broken; Extract candidates for a merge.
  1620. if (MemOps.size() > 0) {
  1621. FormCandidates(MemOps);
  1622. // Reset for the next chain.
  1623. CurrBase = 0;
  1624. CurrOpc = ~0u;
  1625. CurrPred = ARMCC::AL;
  1626. MemOps.clear();
  1627. }
  1628. }
  1629. if (MemOps.size() > 0)
  1630. FormCandidates(MemOps);
  1631. // Sort candidates so they get processed from end to begin of the basic
  1632. // block later; This is necessary for liveness calculation.
  1633. auto LessThan = [](const MergeCandidate* M0, const MergeCandidate *M1) {
  1634. return M0->InsertPos < M1->InsertPos;
  1635. };
  1636. std::sort(Candidates.begin(), Candidates.end(), LessThan);
  1637. // Go through list of candidates and merge.
  1638. bool Changed = false;
  1639. for (const MergeCandidate *Candidate : Candidates) {
  1640. if (Candidate->CanMergeToLSMulti || Candidate->CanMergeToLSDouble) {
  1641. MachineInstr *Merged = MergeOpsUpdate(*Candidate);
  1642. // Merge preceding/trailing base inc/dec into the merged op.
  1643. if (Merged) {
  1644. Changed = true;
  1645. unsigned Opcode = Merged->getOpcode();
  1646. if (Opcode == ARM::t2STRDi8 || Opcode == ARM::t2LDRDi8)
  1647. MergeBaseUpdateLSDouble(*Merged);
  1648. else
  1649. MergeBaseUpdateLSMultiple(Merged);
  1650. } else {
  1651. for (MachineInstr *MI : Candidate->Instrs) {
  1652. if (MergeBaseUpdateLoadStore(MI))
  1653. Changed = true;
  1654. }
  1655. }
  1656. } else {
  1657. assert(Candidate->Instrs.size() == 1);
  1658. if (MergeBaseUpdateLoadStore(Candidate->Instrs.front()))
  1659. Changed = true;
  1660. }
  1661. }
  1662. Candidates.clear();
  1663. // Try to fold add/sub into the LDRD/STRD formed by ARMPreAllocLoadStoreOpt.
  1664. for (MachineInstr *MI : MergeBaseCandidates)
  1665. MergeBaseUpdateLSDouble(*MI);
  1666. MergeBaseCandidates.clear();
  1667. return Changed;
  1668. }
  1669. /// If this is a exit BB, try merging the return ops ("bx lr" and "mov pc, lr")
  1670. /// into the preceding stack restore so it directly restore the value of LR
  1671. /// into pc.
  1672. /// ldmfd sp!, {..., lr}
  1673. /// bx lr
  1674. /// or
  1675. /// ldmfd sp!, {..., lr}
  1676. /// mov pc, lr
  1677. /// =>
  1678. /// ldmfd sp!, {..., pc}
  1679. bool ARMLoadStoreOpt::MergeReturnIntoLDM(MachineBasicBlock &MBB) {
  1680. // Thumb1 LDM doesn't allow high registers.
  1681. if (isThumb1) return false;
  1682. if (MBB.empty()) return false;
  1683. MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
  1684. if (MBBI != MBB.begin() && MBBI != MBB.end() &&
  1685. (MBBI->getOpcode() == ARM::BX_RET ||
  1686. MBBI->getOpcode() == ARM::tBX_RET ||
  1687. MBBI->getOpcode() == ARM::MOVPCLR)) {
  1688. MachineBasicBlock::iterator PrevI = std::prev(MBBI);
  1689. // Ignore any DBG_VALUE instructions.
  1690. while (PrevI->isDebugValue() && PrevI != MBB.begin())
  1691. --PrevI;
  1692. MachineInstr &PrevMI = *PrevI;
  1693. unsigned Opcode = PrevMI.getOpcode();
  1694. if (Opcode == ARM::LDMIA_UPD || Opcode == ARM::LDMDA_UPD ||
  1695. Opcode == ARM::LDMDB_UPD || Opcode == ARM::LDMIB_UPD ||
  1696. Opcode == ARM::t2LDMIA_UPD || Opcode == ARM::t2LDMDB_UPD) {
  1697. MachineOperand &MO = PrevMI.getOperand(PrevMI.getNumOperands() - 1);
  1698. if (MO.getReg() != ARM::LR)
  1699. return false;
  1700. unsigned NewOpc = (isThumb2 ? ARM::t2LDMIA_RET : ARM::LDMIA_RET);
  1701. assert(((isThumb2 && Opcode == ARM::t2LDMIA_UPD) ||
  1702. Opcode == ARM::LDMIA_UPD) && "Unsupported multiple load-return!");
  1703. PrevMI.setDesc(TII->get(NewOpc));
  1704. MO.setReg(ARM::PC);
  1705. PrevMI.copyImplicitOps(*MBB.getParent(), *MBBI);
  1706. MBB.erase(MBBI);
  1707. return true;
  1708. }
  1709. }
  1710. return false;
  1711. }
  1712. bool ARMLoadStoreOpt::CombineMovBx(MachineBasicBlock &MBB) {
  1713. MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
  1714. if (MBBI == MBB.begin() || MBBI == MBB.end() ||
  1715. MBBI->getOpcode() != ARM::tBX_RET)
  1716. return false;
  1717. MachineBasicBlock::iterator Prev = MBBI;
  1718. --Prev;
  1719. if (Prev->getOpcode() != ARM::tMOVr || !Prev->definesRegister(ARM::LR))
  1720. return false;
  1721. for (auto Use : Prev->uses())
  1722. if (Use.isKill()) {
  1723. AddDefaultPred(BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::tBX))
  1724. .addReg(Use.getReg(), RegState::Kill))
  1725. .copyImplicitOps(*MBBI);
  1726. MBB.erase(MBBI);
  1727. MBB.erase(Prev);
  1728. return true;
  1729. }
  1730. llvm_unreachable("tMOVr doesn't kill a reg before tBX_RET?");
  1731. }
  1732. bool ARMLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  1733. if (skipFunction(*Fn.getFunction()))
  1734. return false;
  1735. MF = &Fn;
  1736. STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
  1737. TL = STI->getTargetLowering();
  1738. AFI = Fn.getInfo<ARMFunctionInfo>();
  1739. TII = STI->getInstrInfo();
  1740. TRI = STI->getRegisterInfo();
  1741. RegClassInfoValid = false;
  1742. isThumb2 = AFI->isThumb2Function();
  1743. isThumb1 = AFI->isThumbFunction() && !isThumb2;
  1744. bool Modified = false;
  1745. for (MachineFunction::iterator MFI = Fn.begin(), E = Fn.end(); MFI != E;
  1746. ++MFI) {
  1747. MachineBasicBlock &MBB = *MFI;
  1748. Modified |= LoadStoreMultipleOpti(MBB);
  1749. if (STI->hasV5TOps())
  1750. Modified |= MergeReturnIntoLDM(MBB);
  1751. if (isThumb1)
  1752. Modified |= CombineMovBx(MBB);
  1753. }
  1754. Allocator.DestroyAll();
  1755. return Modified;
  1756. }
  1757. #define ARM_PREALLOC_LOAD_STORE_OPT_NAME \
  1758. "ARM pre- register allocation load / store optimization pass"
  1759. namespace {
  1760. /// Pre- register allocation pass that move load / stores from consecutive
  1761. /// locations close to make it more likely they will be combined later.
  1762. struct ARMPreAllocLoadStoreOpt : public MachineFunctionPass{
  1763. static char ID;
  1764. ARMPreAllocLoadStoreOpt() : MachineFunctionPass(ID) {}
  1765. const DataLayout *TD;
  1766. const TargetInstrInfo *TII;
  1767. const TargetRegisterInfo *TRI;
  1768. const ARMSubtarget *STI;
  1769. MachineRegisterInfo *MRI;
  1770. MachineFunction *MF;
  1771. bool runOnMachineFunction(MachineFunction &Fn) override;
  1772. StringRef getPassName() const override {
  1773. return ARM_PREALLOC_LOAD_STORE_OPT_NAME;
  1774. }
  1775. private:
  1776. bool CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, DebugLoc &dl,
  1777. unsigned &NewOpc, unsigned &EvenReg,
  1778. unsigned &OddReg, unsigned &BaseReg,
  1779. int &Offset,
  1780. unsigned &PredReg, ARMCC::CondCodes &Pred,
  1781. bool &isT2);
  1782. bool RescheduleOps(MachineBasicBlock *MBB,
  1783. SmallVectorImpl<MachineInstr *> &Ops,
  1784. unsigned Base, bool isLd,
  1785. DenseMap<MachineInstr*, unsigned> &MI2LocMap);
  1786. bool RescheduleLoadStoreInstrs(MachineBasicBlock *MBB);
  1787. };
  1788. char ARMPreAllocLoadStoreOpt::ID = 0;
  1789. }
  1790. INITIALIZE_PASS(ARMPreAllocLoadStoreOpt, "arm-prera-ldst-opt",
  1791. ARM_PREALLOC_LOAD_STORE_OPT_NAME, false, false)
  1792. bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) {
  1793. if (AssumeMisalignedLoadStores || skipFunction(*Fn.getFunction()))
  1794. return false;
  1795. TD = &Fn.getDataLayout();
  1796. STI = &static_cast<const ARMSubtarget &>(Fn.getSubtarget());
  1797. TII = STI->getInstrInfo();
  1798. TRI = STI->getRegisterInfo();
  1799. MRI = &Fn.getRegInfo();
  1800. MF = &Fn;
  1801. bool Modified = false;
  1802. for (MachineBasicBlock &MFI : Fn)
  1803. Modified |= RescheduleLoadStoreInstrs(&MFI);
  1804. return Modified;
  1805. }
  1806. static bool IsSafeAndProfitableToMove(bool isLd, unsigned Base,
  1807. MachineBasicBlock::iterator I,
  1808. MachineBasicBlock::iterator E,
  1809. SmallPtrSetImpl<MachineInstr*> &MemOps,
  1810. SmallSet<unsigned, 4> &MemRegs,
  1811. const TargetRegisterInfo *TRI) {
  1812. // Are there stores / loads / calls between them?
  1813. // FIXME: This is overly conservative. We should make use of alias information
  1814. // some day.
  1815. SmallSet<unsigned, 4> AddedRegPressure;
  1816. while (++I != E) {
  1817. if (I->isDebugValue() || MemOps.count(&*I))
  1818. continue;
  1819. if (I->isCall() || I->isTerminator() || I->hasUnmodeledSideEffects())
  1820. return false;
  1821. if (isLd && I->mayStore())
  1822. return false;
  1823. if (!isLd) {
  1824. if (I->mayLoad())
  1825. return false;
  1826. // It's not safe to move the first 'str' down.
  1827. // str r1, [r0]
  1828. // strh r5, [r0]
  1829. // str r4, [r0, #+4]
  1830. if (I->mayStore())
  1831. return false;
  1832. }
  1833. for (unsigned j = 0, NumOps = I->getNumOperands(); j != NumOps; ++j) {
  1834. MachineOperand &MO = I->getOperand(j);
  1835. if (!MO.isReg())
  1836. continue;
  1837. unsigned Reg = MO.getReg();
  1838. if (MO.isDef() && TRI->regsOverlap(Reg, Base))
  1839. return false;
  1840. if (Reg != Base && !MemRegs.count(Reg))
  1841. AddedRegPressure.insert(Reg);
  1842. }
  1843. }
  1844. // Estimate register pressure increase due to the transformation.
  1845. if (MemRegs.size() <= 4)
  1846. // Ok if we are moving small number of instructions.
  1847. return true;
  1848. return AddedRegPressure.size() <= MemRegs.size() * 2;
  1849. }
  1850. bool
  1851. ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
  1852. DebugLoc &dl, unsigned &NewOpc,
  1853. unsigned &FirstReg,
  1854. unsigned &SecondReg,
  1855. unsigned &BaseReg, int &Offset,
  1856. unsigned &PredReg,
  1857. ARMCC::CondCodes &Pred,
  1858. bool &isT2) {
  1859. // Make sure we're allowed to generate LDRD/STRD.
  1860. if (!STI->hasV5TEOps())
  1861. return false;
  1862. // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
  1863. unsigned Scale = 1;
  1864. unsigned Opcode = Op0->getOpcode();
  1865. if (Opcode == ARM::LDRi12) {
  1866. NewOpc = ARM::LDRD;
  1867. } else if (Opcode == ARM::STRi12) {
  1868. NewOpc = ARM::STRD;
  1869. } else if (Opcode == ARM::t2LDRi8 || Opcode == ARM::t2LDRi12) {
  1870. NewOpc = ARM::t2LDRDi8;
  1871. Scale = 4;
  1872. isT2 = true;
  1873. } else if (Opcode == ARM::t2STRi8 || Opcode == ARM::t2STRi12) {
  1874. NewOpc = ARM::t2STRDi8;
  1875. Scale = 4;
  1876. isT2 = true;
  1877. } else {
  1878. return false;
  1879. }
  1880. // Make sure the base address satisfies i64 ld / st alignment requirement.
  1881. // At the moment, we ignore the memoryoperand's value.
  1882. // If we want to use AliasAnalysis, we should check it accordingly.
  1883. if (!Op0->hasOneMemOperand() ||
  1884. (*Op0->memoperands_begin())->isVolatile())
  1885. return false;
  1886. unsigned Align = (*Op0->memoperands_begin())->getAlignment();
  1887. const Function *Func = MF->getFunction();
  1888. unsigned ReqAlign = STI->hasV6Ops()
  1889. ? TD->getABITypeAlignment(Type::getInt64Ty(Func->getContext()))
  1890. : 8; // Pre-v6 need 8-byte align
  1891. if (Align < ReqAlign)
  1892. return false;
  1893. // Then make sure the immediate offset fits.
  1894. int OffImm = getMemoryOpOffset(*Op0);
  1895. if (isT2) {
  1896. int Limit = (1 << 8) * Scale;
  1897. if (OffImm >= Limit || (OffImm <= -Limit) || (OffImm & (Scale-1)))
  1898. return false;
  1899. Offset = OffImm;
  1900. } else {
  1901. ARM_AM::AddrOpc AddSub = ARM_AM::add;
  1902. if (OffImm < 0) {
  1903. AddSub = ARM_AM::sub;
  1904. OffImm = - OffImm;
  1905. }
  1906. int Limit = (1 << 8) * Scale;
  1907. if (OffImm >= Limit || (OffImm & (Scale-1)))
  1908. return false;
  1909. Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
  1910. }
  1911. FirstReg = Op0->getOperand(0).getReg();
  1912. SecondReg = Op1->getOperand(0).getReg();
  1913. if (FirstReg == SecondReg)
  1914. return false;
  1915. BaseReg = Op0->getOperand(1).getReg();
  1916. Pred = getInstrPredicate(*Op0, PredReg);
  1917. dl = Op0->getDebugLoc();
  1918. return true;
  1919. }
  1920. bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
  1921. SmallVectorImpl<MachineInstr *> &Ops,
  1922. unsigned Base, bool isLd,
  1923. DenseMap<MachineInstr*, unsigned> &MI2LocMap) {
  1924. bool RetVal = false;
  1925. // Sort by offset (in reverse order).
  1926. std::sort(Ops.begin(), Ops.end(),
  1927. [](const MachineInstr *LHS, const MachineInstr *RHS) {
  1928. int LOffset = getMemoryOpOffset(*LHS);
  1929. int ROffset = getMemoryOpOffset(*RHS);
  1930. assert(LHS == RHS || LOffset != ROffset);
  1931. return LOffset > ROffset;
  1932. });
  1933. // The loads / stores of the same base are in order. Scan them from first to
  1934. // last and check for the following:
  1935. // 1. Any def of base.
  1936. // 2. Any gaps.
  1937. while (Ops.size() > 1) {
  1938. unsigned FirstLoc = ~0U;
  1939. unsigned LastLoc = 0;
  1940. MachineInstr *FirstOp = nullptr;
  1941. MachineInstr *LastOp = nullptr;
  1942. int LastOffset = 0;
  1943. unsigned LastOpcode = 0;
  1944. unsigned LastBytes = 0;
  1945. unsigned NumMove = 0;
  1946. for (int i = Ops.size() - 1; i >= 0; --i) {
  1947. MachineInstr *Op = Ops[i];
  1948. unsigned Loc = MI2LocMap[Op];
  1949. if (Loc <= FirstLoc) {
  1950. FirstLoc = Loc;
  1951. FirstOp = Op;
  1952. }
  1953. if (Loc >= LastLoc) {
  1954. LastLoc = Loc;
  1955. LastOp = Op;
  1956. }
  1957. unsigned LSMOpcode
  1958. = getLoadStoreMultipleOpcode(Op->getOpcode(), ARM_AM::ia);
  1959. if (LastOpcode && LSMOpcode != LastOpcode)
  1960. break;
  1961. int Offset = getMemoryOpOffset(*Op);
  1962. unsigned Bytes = getLSMultipleTransferSize(Op);
  1963. if (LastBytes) {
  1964. if (Bytes != LastBytes || Offset != (LastOffset + (int)Bytes))
  1965. break;
  1966. }
  1967. LastOffset = Offset;
  1968. LastBytes = Bytes;
  1969. LastOpcode = LSMOpcode;
  1970. if (++NumMove == 8) // FIXME: Tune this limit.
  1971. break;
  1972. }
  1973. if (NumMove <= 1)
  1974. Ops.pop_back();
  1975. else {
  1976. SmallPtrSet<MachineInstr*, 4> MemOps;
  1977. SmallSet<unsigned, 4> MemRegs;
  1978. for (int i = NumMove-1; i >= 0; --i) {
  1979. MemOps.insert(Ops[i]);
  1980. MemRegs.insert(Ops[i]->getOperand(0).getReg());
  1981. }
  1982. // Be conservative, if the instructions are too far apart, don't
  1983. // move them. We want to limit the increase of register pressure.
  1984. bool DoMove = (LastLoc - FirstLoc) <= NumMove*4; // FIXME: Tune this.
  1985. if (DoMove)
  1986. DoMove = IsSafeAndProfitableToMove(isLd, Base, FirstOp, LastOp,
  1987. MemOps, MemRegs, TRI);
  1988. if (!DoMove) {
  1989. for (unsigned i = 0; i != NumMove; ++i)
  1990. Ops.pop_back();
  1991. } else {
  1992. // This is the new location for the loads / stores.
  1993. MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp;
  1994. while (InsertPos != MBB->end() &&
  1995. (MemOps.count(&*InsertPos) || InsertPos->isDebugValue()))
  1996. ++InsertPos;
  1997. // If we are moving a pair of loads / stores, see if it makes sense
  1998. // to try to allocate a pair of registers that can form register pairs.
  1999. MachineInstr *Op0 = Ops.back();
  2000. MachineInstr *Op1 = Ops[Ops.size()-2];
  2001. unsigned FirstReg = 0, SecondReg = 0;
  2002. unsigned BaseReg = 0, PredReg = 0;
  2003. ARMCC::CondCodes Pred = ARMCC::AL;
  2004. bool isT2 = false;
  2005. unsigned NewOpc = 0;
  2006. int Offset = 0;
  2007. DebugLoc dl;
  2008. if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
  2009. FirstReg, SecondReg, BaseReg,
  2010. Offset, PredReg, Pred, isT2)) {
  2011. Ops.pop_back();
  2012. Ops.pop_back();
  2013. const MCInstrDesc &MCID = TII->get(NewOpc);
  2014. const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
  2015. MRI->constrainRegClass(FirstReg, TRC);
  2016. MRI->constrainRegClass(SecondReg, TRC);
  2017. // Form the pair instruction.
  2018. if (isLd) {
  2019. MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
  2020. .addReg(FirstReg, RegState::Define)
  2021. .addReg(SecondReg, RegState::Define)
  2022. .addReg(BaseReg);
  2023. // FIXME: We're converting from LDRi12 to an insn that still
  2024. // uses addrmode2, so we need an explicit offset reg. It should
  2025. // always by reg0 since we're transforming LDRi12s.
  2026. if (!isT2)
  2027. MIB.addReg(0);
  2028. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  2029. MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
  2030. DEBUG(dbgs() << "Formed " << *MIB << "\n");
  2031. ++NumLDRDFormed;
  2032. } else {
  2033. MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
  2034. .addReg(FirstReg)
  2035. .addReg(SecondReg)
  2036. .addReg(BaseReg);
  2037. // FIXME: We're converting from LDRi12 to an insn that still
  2038. // uses addrmode2, so we need an explicit offset reg. It should
  2039. // always by reg0 since we're transforming STRi12s.
  2040. if (!isT2)
  2041. MIB.addReg(0);
  2042. MIB.addImm(Offset).addImm(Pred).addReg(PredReg);
  2043. MIB.setMemRefs(Op0->mergeMemRefsWith(*Op1));
  2044. DEBUG(dbgs() << "Formed " << *MIB << "\n");
  2045. ++NumSTRDFormed;
  2046. }
  2047. MBB->erase(Op0);
  2048. MBB->erase(Op1);
  2049. if (!isT2) {
  2050. // Add register allocation hints to form register pairs.
  2051. MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
  2052. MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
  2053. }
  2054. } else {
  2055. for (unsigned i = 0; i != NumMove; ++i) {
  2056. MachineInstr *Op = Ops.back();
  2057. Ops.pop_back();
  2058. MBB->splice(InsertPos, MBB, Op);
  2059. }
  2060. }
  2061. NumLdStMoved += NumMove;
  2062. RetVal = true;
  2063. }
  2064. }
  2065. }
  2066. return RetVal;
  2067. }
  2068. bool
  2069. ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
  2070. bool RetVal = false;
  2071. DenseMap<MachineInstr*, unsigned> MI2LocMap;
  2072. DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2LdsMap;
  2073. DenseMap<unsigned, SmallVector<MachineInstr*, 4> > Base2StsMap;
  2074. SmallVector<unsigned, 4> LdBases;
  2075. SmallVector<unsigned, 4> StBases;
  2076. unsigned Loc = 0;
  2077. MachineBasicBlock::iterator MBBI = MBB->begin();
  2078. MachineBasicBlock::iterator E = MBB->end();
  2079. while (MBBI != E) {
  2080. for (; MBBI != E; ++MBBI) {
  2081. MachineInstr &MI = *MBBI;
  2082. if (MI.isCall() || MI.isTerminator()) {
  2083. // Stop at barriers.
  2084. ++MBBI;
  2085. break;
  2086. }
  2087. if (!MI.isDebugValue())
  2088. MI2LocMap[&MI] = ++Loc;
  2089. if (!isMemoryOp(MI))
  2090. continue;
  2091. unsigned PredReg = 0;
  2092. if (getInstrPredicate(MI, PredReg) != ARMCC::AL)
  2093. continue;
  2094. int Opc = MI.getOpcode();
  2095. bool isLd = isLoadSingle(Opc);
  2096. unsigned Base = MI.getOperand(1).getReg();
  2097. int Offset = getMemoryOpOffset(MI);
  2098. bool StopHere = false;
  2099. if (isLd) {
  2100. DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
  2101. Base2LdsMap.find(Base);
  2102. if (BI != Base2LdsMap.end()) {
  2103. for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
  2104. if (Offset == getMemoryOpOffset(*BI->second[i])) {
  2105. StopHere = true;
  2106. break;
  2107. }
  2108. }
  2109. if (!StopHere)
  2110. BI->second.push_back(&MI);
  2111. } else {
  2112. Base2LdsMap[Base].push_back(&MI);
  2113. LdBases.push_back(Base);
  2114. }
  2115. } else {
  2116. DenseMap<unsigned, SmallVector<MachineInstr*, 4> >::iterator BI =
  2117. Base2StsMap.find(Base);
  2118. if (BI != Base2StsMap.end()) {
  2119. for (unsigned i = 0, e = BI->second.size(); i != e; ++i) {
  2120. if (Offset == getMemoryOpOffset(*BI->second[i])) {
  2121. StopHere = true;
  2122. break;
  2123. }
  2124. }
  2125. if (!StopHere)
  2126. BI->second.push_back(&MI);
  2127. } else {
  2128. Base2StsMap[Base].push_back(&MI);
  2129. StBases.push_back(Base);
  2130. }
  2131. }
  2132. if (StopHere) {
  2133. // Found a duplicate (a base+offset combination that's seen earlier).
  2134. // Backtrack.
  2135. --Loc;
  2136. break;
  2137. }
  2138. }
  2139. // Re-schedule loads.
  2140. for (unsigned i = 0, e = LdBases.size(); i != e; ++i) {
  2141. unsigned Base = LdBases[i];
  2142. SmallVectorImpl<MachineInstr *> &Lds = Base2LdsMap[Base];
  2143. if (Lds.size() > 1)
  2144. RetVal |= RescheduleOps(MBB, Lds, Base, true, MI2LocMap);
  2145. }
  2146. // Re-schedule stores.
  2147. for (unsigned i = 0, e = StBases.size(); i != e; ++i) {
  2148. unsigned Base = StBases[i];
  2149. SmallVectorImpl<MachineInstr *> &Sts = Base2StsMap[Base];
  2150. if (Sts.size() > 1)
  2151. RetVal |= RescheduleOps(MBB, Sts, Base, false, MI2LocMap);
  2152. }
  2153. if (MBBI != E) {
  2154. Base2LdsMap.clear();
  2155. Base2StsMap.clear();
  2156. LdBases.clear();
  2157. StBases.clear();
  2158. }
  2159. }
  2160. return RetVal;
  2161. }
  2162. /// Returns an instance of the load / store optimization pass.
  2163. FunctionPass *llvm::createARMLoadStoreOptimizationPass(bool PreAlloc) {
  2164. if (PreAlloc)
  2165. return new ARMPreAllocLoadStoreOpt();
  2166. return new ARMLoadStoreOpt();
  2167. }