WebAssemblyRegStackify.cpp 34 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898
  1. //===-- WebAssemblyRegStackify.cpp - Register Stackification --------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. ///
  10. /// \file
  11. /// This file implements a register stacking pass.
  12. ///
  13. /// This pass reorders instructions to put register uses and defs in an order
  14. /// such that they form single-use expression trees. Registers fitting this form
  15. /// are then marked as "stackified", meaning references to them are replaced by
  16. /// "push" and "pop" from the value stack.
  17. ///
  18. /// This is primarily a code size optimization, since temporary values on the
  19. /// value stack don't need to be named.
  20. ///
  21. //===----------------------------------------------------------------------===//
  22. #include "MCTargetDesc/WebAssemblyMCTargetDesc.h" // for WebAssembly::ARGUMENT_*
  23. #include "WebAssembly.h"
  24. #include "WebAssemblyMachineFunctionInfo.h"
  25. #include "WebAssemblySubtarget.h"
  26. #include "WebAssemblyUtilities.h"
  27. #include "llvm/Analysis/AliasAnalysis.h"
  28. #include "llvm/CodeGen/LiveIntervals.h"
  29. #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
  30. #include "llvm/CodeGen/MachineDominators.h"
  31. #include "llvm/CodeGen/MachineInstrBuilder.h"
  32. #include "llvm/CodeGen/MachineModuleInfoImpls.h"
  33. #include "llvm/CodeGen/MachineRegisterInfo.h"
  34. #include "llvm/CodeGen/Passes.h"
  35. #include "llvm/Support/Debug.h"
  36. #include "llvm/Support/raw_ostream.h"
  37. using namespace llvm;
  38. #define DEBUG_TYPE "wasm-reg-stackify"
  39. namespace {
  40. class WebAssemblyRegStackify final : public MachineFunctionPass {
  41. StringRef getPassName() const override {
  42. return "WebAssembly Register Stackify";
  43. }
  44. void getAnalysisUsage(AnalysisUsage &AU) const override {
  45. AU.setPreservesCFG();
  46. AU.addRequired<AAResultsWrapperPass>();
  47. AU.addRequired<MachineDominatorTree>();
  48. AU.addRequired<LiveIntervals>();
  49. AU.addPreserved<MachineBlockFrequencyInfo>();
  50. AU.addPreserved<SlotIndexes>();
  51. AU.addPreserved<LiveIntervals>();
  52. AU.addPreservedID(LiveVariablesID);
  53. AU.addPreserved<MachineDominatorTree>();
  54. MachineFunctionPass::getAnalysisUsage(AU);
  55. }
  56. bool runOnMachineFunction(MachineFunction &MF) override;
  57. public:
  58. static char ID; // Pass identification, replacement for typeid
  59. WebAssemblyRegStackify() : MachineFunctionPass(ID) {}
  60. };
  61. } // end anonymous namespace
  62. char WebAssemblyRegStackify::ID = 0;
  63. INITIALIZE_PASS(WebAssemblyRegStackify, DEBUG_TYPE,
  64. "Reorder instructions to use the WebAssembly value stack",
  65. false, false)
  66. FunctionPass *llvm::createWebAssemblyRegStackify() {
  67. return new WebAssemblyRegStackify();
  68. }
  69. // Decorate the given instruction with implicit operands that enforce the
  70. // expression stack ordering constraints for an instruction which is on
  71. // the expression stack.
  72. static void ImposeStackOrdering(MachineInstr *MI) {
  73. // Write the opaque VALUE_STACK register.
  74. if (!MI->definesRegister(WebAssembly::VALUE_STACK))
  75. MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
  76. /*isDef=*/true,
  77. /*isImp=*/true));
  78. // Also read the opaque VALUE_STACK register.
  79. if (!MI->readsRegister(WebAssembly::VALUE_STACK))
  80. MI->addOperand(MachineOperand::CreateReg(WebAssembly::VALUE_STACK,
  81. /*isDef=*/false,
  82. /*isImp=*/true));
  83. }
  84. // Convert an IMPLICIT_DEF instruction into an instruction which defines
  85. // a constant zero value.
  86. static void ConvertImplicitDefToConstZero(MachineInstr *MI,
  87. MachineRegisterInfo &MRI,
  88. const TargetInstrInfo *TII,
  89. MachineFunction &MF) {
  90. assert(MI->getOpcode() == TargetOpcode::IMPLICIT_DEF);
  91. const auto *RegClass =
  92. MRI.getRegClass(MI->getOperand(0).getReg());
  93. if (RegClass == &WebAssembly::I32RegClass) {
  94. MI->setDesc(TII->get(WebAssembly::CONST_I32));
  95. MI->addOperand(MachineOperand::CreateImm(0));
  96. } else if (RegClass == &WebAssembly::I64RegClass) {
  97. MI->setDesc(TII->get(WebAssembly::CONST_I64));
  98. MI->addOperand(MachineOperand::CreateImm(0));
  99. } else if (RegClass == &WebAssembly::F32RegClass) {
  100. MI->setDesc(TII->get(WebAssembly::CONST_F32));
  101. ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
  102. Type::getFloatTy(MF.getFunction().getContext())));
  103. MI->addOperand(MachineOperand::CreateFPImm(Val));
  104. } else if (RegClass == &WebAssembly::F64RegClass) {
  105. MI->setDesc(TII->get(WebAssembly::CONST_F64));
  106. ConstantFP *Val = cast<ConstantFP>(Constant::getNullValue(
  107. Type::getDoubleTy(MF.getFunction().getContext())));
  108. MI->addOperand(MachineOperand::CreateFPImm(Val));
  109. } else {
  110. llvm_unreachable("Unexpected reg class");
  111. }
  112. }
  113. // Determine whether a call to the callee referenced by
  114. // MI->getOperand(CalleeOpNo) reads memory, writes memory, and/or has side
  115. // effects.
  116. static void QueryCallee(const MachineInstr &MI, unsigned CalleeOpNo, bool &Read,
  117. bool &Write, bool &Effects, bool &StackPointer) {
  118. // All calls can use the stack pointer.
  119. StackPointer = true;
  120. const MachineOperand &MO = MI.getOperand(CalleeOpNo);
  121. if (MO.isGlobal()) {
  122. const Constant *GV = MO.getGlobal();
  123. if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
  124. if (!GA->isInterposable())
  125. GV = GA->getAliasee();
  126. if (const Function *F = dyn_cast<Function>(GV)) {
  127. if (!F->doesNotThrow())
  128. Effects = true;
  129. if (F->doesNotAccessMemory())
  130. return;
  131. if (F->onlyReadsMemory()) {
  132. Read = true;
  133. return;
  134. }
  135. }
  136. }
  137. // Assume the worst.
  138. Write = true;
  139. Read = true;
  140. Effects = true;
  141. }
  142. // Determine whether MI reads memory, writes memory, has side effects,
  143. // and/or uses the stack pointer value.
  144. static void Query(const MachineInstr &MI, AliasAnalysis &AA, bool &Read,
  145. bool &Write, bool &Effects, bool &StackPointer) {
  146. assert(!MI.isPosition());
  147. assert(!MI.isTerminator());
  148. if (MI.isDebugInstr())
  149. return;
  150. // Check for loads.
  151. if (MI.mayLoad() && !MI.isDereferenceableInvariantLoad(&AA))
  152. Read = true;
  153. // Check for stores.
  154. if (MI.mayStore()) {
  155. Write = true;
  156. // Check for stores to __stack_pointer.
  157. for (auto MMO : MI.memoperands()) {
  158. const MachinePointerInfo &MPI = MMO->getPointerInfo();
  159. if (MPI.V.is<const PseudoSourceValue *>()) {
  160. auto PSV = MPI.V.get<const PseudoSourceValue *>();
  161. if (const ExternalSymbolPseudoSourceValue *EPSV =
  162. dyn_cast<ExternalSymbolPseudoSourceValue>(PSV))
  163. if (StringRef(EPSV->getSymbol()) == "__stack_pointer") {
  164. StackPointer = true;
  165. }
  166. }
  167. }
  168. } else if (MI.hasOrderedMemoryRef()) {
  169. switch (MI.getOpcode()) {
  170. case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
  171. case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
  172. case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
  173. case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
  174. case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
  175. case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
  176. case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
  177. case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
  178. // These instruction have hasUnmodeledSideEffects() returning true
  179. // because they trap on overflow and invalid so they can't be arbitrarily
  180. // moved, however hasOrderedMemoryRef() interprets this plus their lack
  181. // of memoperands as having a potential unknown memory reference.
  182. break;
  183. default:
  184. // Record volatile accesses, unless it's a call, as calls are handled
  185. // specially below.
  186. if (!MI.isCall()) {
  187. Write = true;
  188. Effects = true;
  189. }
  190. break;
  191. }
  192. }
  193. // Check for side effects.
  194. if (MI.hasUnmodeledSideEffects()) {
  195. switch (MI.getOpcode()) {
  196. case WebAssembly::DIV_S_I32: case WebAssembly::DIV_S_I64:
  197. case WebAssembly::REM_S_I32: case WebAssembly::REM_S_I64:
  198. case WebAssembly::DIV_U_I32: case WebAssembly::DIV_U_I64:
  199. case WebAssembly::REM_U_I32: case WebAssembly::REM_U_I64:
  200. case WebAssembly::I32_TRUNC_S_F32: case WebAssembly::I64_TRUNC_S_F32:
  201. case WebAssembly::I32_TRUNC_S_F64: case WebAssembly::I64_TRUNC_S_F64:
  202. case WebAssembly::I32_TRUNC_U_F32: case WebAssembly::I64_TRUNC_U_F32:
  203. case WebAssembly::I32_TRUNC_U_F64: case WebAssembly::I64_TRUNC_U_F64:
  204. // These instructions have hasUnmodeledSideEffects() returning true
  205. // because they trap on overflow and invalid so they can't be arbitrarily
  206. // moved, however in the specific case of register stackifying, it is safe
  207. // to move them because overflow and invalid are Undefined Behavior.
  208. break;
  209. default:
  210. Effects = true;
  211. break;
  212. }
  213. }
  214. // Analyze calls.
  215. if (MI.isCall()) {
  216. switch (MI.getOpcode()) {
  217. case WebAssembly::CALL_VOID:
  218. case WebAssembly::CALL_INDIRECT_VOID:
  219. QueryCallee(MI, 0, Read, Write, Effects, StackPointer);
  220. break;
  221. case WebAssembly::CALL_I32: case WebAssembly::CALL_I64:
  222. case WebAssembly::CALL_F32: case WebAssembly::CALL_F64:
  223. case WebAssembly::CALL_INDIRECT_I32: case WebAssembly::CALL_INDIRECT_I64:
  224. case WebAssembly::CALL_INDIRECT_F32: case WebAssembly::CALL_INDIRECT_F64:
  225. QueryCallee(MI, 1, Read, Write, Effects, StackPointer);
  226. break;
  227. default:
  228. llvm_unreachable("unexpected call opcode");
  229. }
  230. }
  231. }
  232. // Test whether Def is safe and profitable to rematerialize.
  233. static bool ShouldRematerialize(const MachineInstr &Def, AliasAnalysis &AA,
  234. const WebAssemblyInstrInfo *TII) {
  235. return Def.isAsCheapAsAMove() && TII->isTriviallyReMaterializable(Def, &AA);
  236. }
  237. // Identify the definition for this register at this point. This is a
  238. // generalization of MachineRegisterInfo::getUniqueVRegDef that uses
  239. // LiveIntervals to handle complex cases.
  240. static MachineInstr *GetVRegDef(unsigned Reg, const MachineInstr *Insert,
  241. const MachineRegisterInfo &MRI,
  242. const LiveIntervals &LIS)
  243. {
  244. // Most registers are in SSA form here so we try a quick MRI query first.
  245. if (MachineInstr *Def = MRI.getUniqueVRegDef(Reg))
  246. return Def;
  247. // MRI doesn't know what the Def is. Try asking LIS.
  248. if (const VNInfo *ValNo = LIS.getInterval(Reg).getVNInfoBefore(
  249. LIS.getInstructionIndex(*Insert)))
  250. return LIS.getInstructionFromIndex(ValNo->def);
  251. return nullptr;
  252. }
  253. // Test whether Reg, as defined at Def, has exactly one use. This is a
  254. // generalization of MachineRegisterInfo::hasOneUse that uses LiveIntervals
  255. // to handle complex cases.
  256. static bool HasOneUse(unsigned Reg, MachineInstr *Def,
  257. MachineRegisterInfo &MRI, MachineDominatorTree &MDT,
  258. LiveIntervals &LIS) {
  259. // Most registers are in SSA form here so we try a quick MRI query first.
  260. if (MRI.hasOneUse(Reg))
  261. return true;
  262. bool HasOne = false;
  263. const LiveInterval &LI = LIS.getInterval(Reg);
  264. const VNInfo *DefVNI = LI.getVNInfoAt(
  265. LIS.getInstructionIndex(*Def).getRegSlot());
  266. assert(DefVNI);
  267. for (auto &I : MRI.use_nodbg_operands(Reg)) {
  268. const auto &Result = LI.Query(LIS.getInstructionIndex(*I.getParent()));
  269. if (Result.valueIn() == DefVNI) {
  270. if (!Result.isKill())
  271. return false;
  272. if (HasOne)
  273. return false;
  274. HasOne = true;
  275. }
  276. }
  277. return HasOne;
  278. }
  279. // Test whether it's safe to move Def to just before Insert.
  280. // TODO: Compute memory dependencies in a way that doesn't require always
  281. // walking the block.
  282. // TODO: Compute memory dependencies in a way that uses AliasAnalysis to be
  283. // more precise.
  284. static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert,
  285. AliasAnalysis &AA, const MachineRegisterInfo &MRI) {
  286. assert(Def->getParent() == Insert->getParent());
  287. // Check for register dependencies.
  288. SmallVector<unsigned, 4> MutableRegisters;
  289. for (const MachineOperand &MO : Def->operands()) {
  290. if (!MO.isReg() || MO.isUndef())
  291. continue;
  292. unsigned Reg = MO.getReg();
  293. // If the register is dead here and at Insert, ignore it.
  294. if (MO.isDead() && Insert->definesRegister(Reg) &&
  295. !Insert->readsRegister(Reg))
  296. continue;
  297. if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
  298. // Ignore ARGUMENTS; it's just used to keep the ARGUMENT_* instructions
  299. // from moving down, and we've already checked for that.
  300. if (Reg == WebAssembly::ARGUMENTS)
  301. continue;
  302. // If the physical register is never modified, ignore it.
  303. if (!MRI.isPhysRegModified(Reg))
  304. continue;
  305. // Otherwise, it's a physical register with unknown liveness.
  306. return false;
  307. }
  308. // If one of the operands isn't in SSA form, it has different values at
  309. // different times, and we need to make sure we don't move our use across
  310. // a different def.
  311. if (!MO.isDef() && !MRI.hasOneDef(Reg))
  312. MutableRegisters.push_back(Reg);
  313. }
  314. bool Read = false, Write = false, Effects = false, StackPointer = false;
  315. Query(*Def, AA, Read, Write, Effects, StackPointer);
  316. // If the instruction does not access memory and has no side effects, it has
  317. // no additional dependencies.
  318. bool HasMutableRegisters = !MutableRegisters.empty();
  319. if (!Read && !Write && !Effects && !StackPointer && !HasMutableRegisters)
  320. return true;
  321. // Scan through the intervening instructions between Def and Insert.
  322. MachineBasicBlock::const_iterator D(Def), I(Insert);
  323. for (--I; I != D; --I) {
  324. bool InterveningRead = false;
  325. bool InterveningWrite = false;
  326. bool InterveningEffects = false;
  327. bool InterveningStackPointer = false;
  328. Query(*I, AA, InterveningRead, InterveningWrite, InterveningEffects,
  329. InterveningStackPointer);
  330. if (Effects && InterveningEffects)
  331. return false;
  332. if (Read && InterveningWrite)
  333. return false;
  334. if (Write && (InterveningRead || InterveningWrite))
  335. return false;
  336. if (StackPointer && InterveningStackPointer)
  337. return false;
  338. for (unsigned Reg : MutableRegisters)
  339. for (const MachineOperand &MO : I->operands())
  340. if (MO.isReg() && MO.isDef() && MO.getReg() == Reg)
  341. return false;
  342. }
  343. return true;
  344. }
  345. /// Test whether OneUse, a use of Reg, dominates all of Reg's other uses.
  346. static bool OneUseDominatesOtherUses(unsigned Reg, const MachineOperand &OneUse,
  347. const MachineBasicBlock &MBB,
  348. const MachineRegisterInfo &MRI,
  349. const MachineDominatorTree &MDT,
  350. LiveIntervals &LIS,
  351. WebAssemblyFunctionInfo &MFI) {
  352. const LiveInterval &LI = LIS.getInterval(Reg);
  353. const MachineInstr *OneUseInst = OneUse.getParent();
  354. VNInfo *OneUseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*OneUseInst));
  355. for (const MachineOperand &Use : MRI.use_nodbg_operands(Reg)) {
  356. if (&Use == &OneUse)
  357. continue;
  358. const MachineInstr *UseInst = Use.getParent();
  359. VNInfo *UseVNI = LI.getVNInfoBefore(LIS.getInstructionIndex(*UseInst));
  360. if (UseVNI != OneUseVNI)
  361. continue;
  362. const MachineInstr *OneUseInst = OneUse.getParent();
  363. if (UseInst == OneUseInst) {
  364. // Another use in the same instruction. We need to ensure that the one
  365. // selected use happens "before" it.
  366. if (&OneUse > &Use)
  367. return false;
  368. } else {
  369. // Test that the use is dominated by the one selected use.
  370. while (!MDT.dominates(OneUseInst, UseInst)) {
  371. // Actually, dominating is over-conservative. Test that the use would
  372. // happen after the one selected use in the stack evaluation order.
  373. //
  374. // This is needed as a consequence of using implicit get_locals for
  375. // uses and implicit set_locals for defs.
  376. if (UseInst->getDesc().getNumDefs() == 0)
  377. return false;
  378. const MachineOperand &MO = UseInst->getOperand(0);
  379. if (!MO.isReg())
  380. return false;
  381. unsigned DefReg = MO.getReg();
  382. if (!TargetRegisterInfo::isVirtualRegister(DefReg) ||
  383. !MFI.isVRegStackified(DefReg))
  384. return false;
  385. assert(MRI.hasOneUse(DefReg));
  386. const MachineOperand &NewUse = *MRI.use_begin(DefReg);
  387. const MachineInstr *NewUseInst = NewUse.getParent();
  388. if (NewUseInst == OneUseInst) {
  389. if (&OneUse > &NewUse)
  390. return false;
  391. break;
  392. }
  393. UseInst = NewUseInst;
  394. }
  395. }
  396. }
  397. return true;
  398. }
  399. /// Get the appropriate tee opcode for the given register class.
  400. static unsigned GetTeeOpcode(const TargetRegisterClass *RC) {
  401. if (RC == &WebAssembly::I32RegClass)
  402. return WebAssembly::TEE_I32;
  403. if (RC == &WebAssembly::I64RegClass)
  404. return WebAssembly::TEE_I64;
  405. if (RC == &WebAssembly::F32RegClass)
  406. return WebAssembly::TEE_F32;
  407. if (RC == &WebAssembly::F64RegClass)
  408. return WebAssembly::TEE_F64;
  409. if (RC == &WebAssembly::V128RegClass)
  410. return WebAssembly::TEE_V128;
  411. llvm_unreachable("Unexpected register class");
  412. }
  413. // Shrink LI to its uses, cleaning up LI.
  414. static void ShrinkToUses(LiveInterval &LI, LiveIntervals &LIS) {
  415. if (LIS.shrinkToUses(&LI)) {
  416. SmallVector<LiveInterval*, 4> SplitLIs;
  417. LIS.splitSeparateComponents(LI, SplitLIs);
  418. }
  419. }
  420. /// A single-use def in the same block with no intervening memory or register
  421. /// dependencies; move the def down and nest it with the current instruction.
  422. static MachineInstr *MoveForSingleUse(unsigned Reg, MachineOperand& Op,
  423. MachineInstr *Def,
  424. MachineBasicBlock &MBB,
  425. MachineInstr *Insert, LiveIntervals &LIS,
  426. WebAssemblyFunctionInfo &MFI,
  427. MachineRegisterInfo &MRI) {
  428. DEBUG(dbgs() << "Move for single use: "; Def->dump());
  429. MBB.splice(Insert, &MBB, Def);
  430. LIS.handleMove(*Def);
  431. if (MRI.hasOneDef(Reg) && MRI.hasOneUse(Reg)) {
  432. // No one else is using this register for anything so we can just stackify
  433. // it in place.
  434. MFI.stackifyVReg(Reg);
  435. } else {
  436. // The register may have unrelated uses or defs; create a new register for
  437. // just our one def and use so that we can stackify it.
  438. unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
  439. Def->getOperand(0).setReg(NewReg);
  440. Op.setReg(NewReg);
  441. // Tell LiveIntervals about the new register.
  442. LIS.createAndComputeVirtRegInterval(NewReg);
  443. // Tell LiveIntervals about the changes to the old register.
  444. LiveInterval &LI = LIS.getInterval(Reg);
  445. LI.removeSegment(LIS.getInstructionIndex(*Def).getRegSlot(),
  446. LIS.getInstructionIndex(*Op.getParent()).getRegSlot(),
  447. /*RemoveDeadValNo=*/true);
  448. MFI.stackifyVReg(NewReg);
  449. DEBUG(dbgs() << " - Replaced register: "; Def->dump());
  450. }
  451. ImposeStackOrdering(Def);
  452. return Def;
  453. }
  454. /// A trivially cloneable instruction; clone it and nest the new copy with the
  455. /// current instruction.
  456. static MachineInstr *RematerializeCheapDef(
  457. unsigned Reg, MachineOperand &Op, MachineInstr &Def, MachineBasicBlock &MBB,
  458. MachineBasicBlock::instr_iterator Insert, LiveIntervals &LIS,
  459. WebAssemblyFunctionInfo &MFI, MachineRegisterInfo &MRI,
  460. const WebAssemblyInstrInfo *TII, const WebAssemblyRegisterInfo *TRI) {
  461. DEBUG(dbgs() << "Rematerializing cheap def: "; Def.dump());
  462. DEBUG(dbgs() << " - for use in "; Op.getParent()->dump());
  463. unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(Reg));
  464. TII->reMaterialize(MBB, Insert, NewReg, 0, Def, *TRI);
  465. Op.setReg(NewReg);
  466. MachineInstr *Clone = &*std::prev(Insert);
  467. LIS.InsertMachineInstrInMaps(*Clone);
  468. LIS.createAndComputeVirtRegInterval(NewReg);
  469. MFI.stackifyVReg(NewReg);
  470. ImposeStackOrdering(Clone);
  471. DEBUG(dbgs() << " - Cloned to "; Clone->dump());
  472. // Shrink the interval.
  473. bool IsDead = MRI.use_empty(Reg);
  474. if (!IsDead) {
  475. LiveInterval &LI = LIS.getInterval(Reg);
  476. ShrinkToUses(LI, LIS);
  477. IsDead = !LI.liveAt(LIS.getInstructionIndex(Def).getDeadSlot());
  478. }
  479. // If that was the last use of the original, delete the original.
  480. if (IsDead) {
  481. DEBUG(dbgs() << " - Deleting original\n");
  482. SlotIndex Idx = LIS.getInstructionIndex(Def).getRegSlot();
  483. LIS.removePhysRegDefAt(WebAssembly::ARGUMENTS, Idx);
  484. LIS.removeInterval(Reg);
  485. LIS.RemoveMachineInstrFromMaps(Def);
  486. Def.eraseFromParent();
  487. }
  488. return Clone;
  489. }
  490. /// A multiple-use def in the same block with no intervening memory or register
  491. /// dependencies; move the def down, nest it with the current instruction, and
  492. /// insert a tee to satisfy the rest of the uses. As an illustration, rewrite
  493. /// this:
  494. ///
  495. /// Reg = INST ... // Def
  496. /// INST ..., Reg, ... // Insert
  497. /// INST ..., Reg, ...
  498. /// INST ..., Reg, ...
  499. ///
  500. /// to this:
  501. ///
  502. /// DefReg = INST ... // Def (to become the new Insert)
  503. /// TeeReg, Reg = TEE_... DefReg
  504. /// INST ..., TeeReg, ... // Insert
  505. /// INST ..., Reg, ...
  506. /// INST ..., Reg, ...
  507. ///
  508. /// with DefReg and TeeReg stackified. This eliminates a get_local from the
  509. /// resulting code.
  510. static MachineInstr *MoveAndTeeForMultiUse(
  511. unsigned Reg, MachineOperand &Op, MachineInstr *Def, MachineBasicBlock &MBB,
  512. MachineInstr *Insert, LiveIntervals &LIS, WebAssemblyFunctionInfo &MFI,
  513. MachineRegisterInfo &MRI, const WebAssemblyInstrInfo *TII) {
  514. DEBUG(dbgs() << "Move and tee for multi-use:"; Def->dump());
  515. // Move Def into place.
  516. MBB.splice(Insert, &MBB, Def);
  517. LIS.handleMove(*Def);
  518. // Create the Tee and attach the registers.
  519. const auto *RegClass = MRI.getRegClass(Reg);
  520. unsigned TeeReg = MRI.createVirtualRegister(RegClass);
  521. unsigned DefReg = MRI.createVirtualRegister(RegClass);
  522. MachineOperand &DefMO = Def->getOperand(0);
  523. MachineInstr *Tee = BuildMI(MBB, Insert, Insert->getDebugLoc(),
  524. TII->get(GetTeeOpcode(RegClass)), TeeReg)
  525. .addReg(Reg, RegState::Define)
  526. .addReg(DefReg, getUndefRegState(DefMO.isDead()));
  527. Op.setReg(TeeReg);
  528. DefMO.setReg(DefReg);
  529. SlotIndex TeeIdx = LIS.InsertMachineInstrInMaps(*Tee).getRegSlot();
  530. SlotIndex DefIdx = LIS.getInstructionIndex(*Def).getRegSlot();
  531. // Tell LiveIntervals we moved the original vreg def from Def to Tee.
  532. LiveInterval &LI = LIS.getInterval(Reg);
  533. LiveInterval::iterator I = LI.FindSegmentContaining(DefIdx);
  534. VNInfo *ValNo = LI.getVNInfoAt(DefIdx);
  535. I->start = TeeIdx;
  536. ValNo->def = TeeIdx;
  537. ShrinkToUses(LI, LIS);
  538. // Finish stackifying the new regs.
  539. LIS.createAndComputeVirtRegInterval(TeeReg);
  540. LIS.createAndComputeVirtRegInterval(DefReg);
  541. MFI.stackifyVReg(DefReg);
  542. MFI.stackifyVReg(TeeReg);
  543. ImposeStackOrdering(Def);
  544. ImposeStackOrdering(Tee);
  545. DEBUG(dbgs() << " - Replaced register: "; Def->dump());
  546. DEBUG(dbgs() << " - Tee instruction: "; Tee->dump());
  547. return Def;
  548. }
  549. namespace {
  550. /// A stack for walking the tree of instructions being built, visiting the
  551. /// MachineOperands in DFS order.
  552. class TreeWalkerState {
  553. typedef MachineInstr::mop_iterator mop_iterator;
  554. typedef std::reverse_iterator<mop_iterator> mop_reverse_iterator;
  555. typedef iterator_range<mop_reverse_iterator> RangeTy;
  556. SmallVector<RangeTy, 4> Worklist;
  557. public:
  558. explicit TreeWalkerState(MachineInstr *Insert) {
  559. const iterator_range<mop_iterator> &Range = Insert->explicit_uses();
  560. if (Range.begin() != Range.end())
  561. Worklist.push_back(reverse(Range));
  562. }
  563. bool Done() const { return Worklist.empty(); }
  564. MachineOperand &Pop() {
  565. RangeTy &Range = Worklist.back();
  566. MachineOperand &Op = *Range.begin();
  567. Range = drop_begin(Range, 1);
  568. if (Range.begin() == Range.end())
  569. Worklist.pop_back();
  570. assert((Worklist.empty() ||
  571. Worklist.back().begin() != Worklist.back().end()) &&
  572. "Empty ranges shouldn't remain in the worklist");
  573. return Op;
  574. }
  575. /// Push Instr's operands onto the stack to be visited.
  576. void PushOperands(MachineInstr *Instr) {
  577. const iterator_range<mop_iterator> &Range(Instr->explicit_uses());
  578. if (Range.begin() != Range.end())
  579. Worklist.push_back(reverse(Range));
  580. }
  581. /// Some of Instr's operands are on the top of the stack; remove them and
  582. /// re-insert them starting from the beginning (because we've commuted them).
  583. void ResetTopOperands(MachineInstr *Instr) {
  584. assert(HasRemainingOperands(Instr) &&
  585. "Reseting operands should only be done when the instruction has "
  586. "an operand still on the stack");
  587. Worklist.back() = reverse(Instr->explicit_uses());
  588. }
  589. /// Test whether Instr has operands remaining to be visited at the top of
  590. /// the stack.
  591. bool HasRemainingOperands(const MachineInstr *Instr) const {
  592. if (Worklist.empty())
  593. return false;
  594. const RangeTy &Range = Worklist.back();
  595. return Range.begin() != Range.end() && Range.begin()->getParent() == Instr;
  596. }
  597. /// Test whether the given register is present on the stack, indicating an
  598. /// operand in the tree that we haven't visited yet. Moving a definition of
  599. /// Reg to a point in the tree after that would change its value.
  600. ///
  601. /// This is needed as a consequence of using implicit get_locals for
  602. /// uses and implicit set_locals for defs.
  603. bool IsOnStack(unsigned Reg) const {
  604. for (const RangeTy &Range : Worklist)
  605. for (const MachineOperand &MO : Range)
  606. if (MO.isReg() && MO.getReg() == Reg)
  607. return true;
  608. return false;
  609. }
  610. };
  611. /// State to keep track of whether commuting is in flight or whether it's been
  612. /// tried for the current instruction and didn't work.
  613. class CommutingState {
  614. /// There are effectively three states: the initial state where we haven't
  615. /// started commuting anything and we don't know anything yet, the tenative
  616. /// state where we've commuted the operands of the current instruction and are
  617. /// revisting it, and the declined state where we've reverted the operands
  618. /// back to their original order and will no longer commute it further.
  619. bool TentativelyCommuting;
  620. bool Declined;
  621. /// During the tentative state, these hold the operand indices of the commuted
  622. /// operands.
  623. unsigned Operand0, Operand1;
  624. public:
  625. CommutingState() : TentativelyCommuting(false), Declined(false) {}
  626. /// Stackification for an operand was not successful due to ordering
  627. /// constraints. If possible, and if we haven't already tried it and declined
  628. /// it, commute Insert's operands and prepare to revisit it.
  629. void MaybeCommute(MachineInstr *Insert, TreeWalkerState &TreeWalker,
  630. const WebAssemblyInstrInfo *TII) {
  631. if (TentativelyCommuting) {
  632. assert(!Declined &&
  633. "Don't decline commuting until you've finished trying it");
  634. // Commuting didn't help. Revert it.
  635. TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
  636. TentativelyCommuting = false;
  637. Declined = true;
  638. } else if (!Declined && TreeWalker.HasRemainingOperands(Insert)) {
  639. Operand0 = TargetInstrInfo::CommuteAnyOperandIndex;
  640. Operand1 = TargetInstrInfo::CommuteAnyOperandIndex;
  641. if (TII->findCommutedOpIndices(*Insert, Operand0, Operand1)) {
  642. // Tentatively commute the operands and try again.
  643. TII->commuteInstruction(*Insert, /*NewMI=*/false, Operand0, Operand1);
  644. TreeWalker.ResetTopOperands(Insert);
  645. TentativelyCommuting = true;
  646. Declined = false;
  647. }
  648. }
  649. }
  650. /// Stackification for some operand was successful. Reset to the default
  651. /// state.
  652. void Reset() {
  653. TentativelyCommuting = false;
  654. Declined = false;
  655. }
  656. };
  657. } // end anonymous namespace
  658. bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) {
  659. DEBUG(dbgs() << "********** Register Stackifying **********\n"
  660. "********** Function: "
  661. << MF.getName() << '\n');
  662. bool Changed = false;
  663. MachineRegisterInfo &MRI = MF.getRegInfo();
  664. WebAssemblyFunctionInfo &MFI = *MF.getInfo<WebAssemblyFunctionInfo>();
  665. const auto *TII = MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
  666. const auto *TRI = MF.getSubtarget<WebAssemblySubtarget>().getRegisterInfo();
  667. AliasAnalysis &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
  668. MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
  669. LiveIntervals &LIS = getAnalysis<LiveIntervals>();
  670. // Disable the TEE optimization if we aren't doing direct wasm object
  671. // emission, because lowering TEE to TEE_LOCAL is done in the ExplicitLocals
  672. // pass, which is also disabled.
  673. bool UseTee = true;
  674. if (MF.getSubtarget<WebAssemblySubtarget>()
  675. .getTargetTriple().isOSBinFormatELF())
  676. UseTee = false;
  677. // Walk the instructions from the bottom up. Currently we don't look past
  678. // block boundaries, and the blocks aren't ordered so the block visitation
  679. // order isn't significant, but we may want to change this in the future.
  680. for (MachineBasicBlock &MBB : MF) {
  681. // Don't use a range-based for loop, because we modify the list as we're
  682. // iterating over it and the end iterator may change.
  683. for (auto MII = MBB.rbegin(); MII != MBB.rend(); ++MII) {
  684. MachineInstr *Insert = &*MII;
  685. // Don't nest anything inside an inline asm, because we don't have
  686. // constraints for $push inputs.
  687. if (Insert->getOpcode() == TargetOpcode::INLINEASM)
  688. continue;
  689. // Ignore debugging intrinsics.
  690. if (Insert->getOpcode() == TargetOpcode::DBG_VALUE)
  691. continue;
  692. // Iterate through the inputs in reverse order, since we'll be pulling
  693. // operands off the stack in LIFO order.
  694. CommutingState Commuting;
  695. TreeWalkerState TreeWalker(Insert);
  696. while (!TreeWalker.Done()) {
  697. MachineOperand &Op = TreeWalker.Pop();
  698. // We're only interested in explicit virtual register operands.
  699. if (!Op.isReg())
  700. continue;
  701. unsigned Reg = Op.getReg();
  702. assert(Op.isUse() && "explicit_uses() should only iterate over uses");
  703. assert(!Op.isImplicit() &&
  704. "explicit_uses() should only iterate over explicit operands");
  705. if (TargetRegisterInfo::isPhysicalRegister(Reg))
  706. continue;
  707. // Identify the definition for this register at this point.
  708. MachineInstr *Def = GetVRegDef(Reg, Insert, MRI, LIS);
  709. if (!Def)
  710. continue;
  711. // Don't nest an INLINE_ASM def into anything, because we don't have
  712. // constraints for $pop outputs.
  713. if (Def->getOpcode() == TargetOpcode::INLINEASM)
  714. continue;
  715. // Argument instructions represent live-in registers and not real
  716. // instructions.
  717. if (WebAssembly::isArgument(*Def))
  718. continue;
  719. // Decide which strategy to take. Prefer to move a single-use value
  720. // over cloning it, and prefer cloning over introducing a tee.
  721. // For moving, we require the def to be in the same block as the use;
  722. // this makes things simpler (LiveIntervals' handleMove function only
  723. // supports intra-block moves) and it's MachineSink's job to catch all
  724. // the sinking opportunities anyway.
  725. bool SameBlock = Def->getParent() == &MBB;
  726. bool CanMove = SameBlock && IsSafeToMove(Def, Insert, AA, MRI) &&
  727. !TreeWalker.IsOnStack(Reg);
  728. if (CanMove && HasOneUse(Reg, Def, MRI, MDT, LIS)) {
  729. Insert = MoveForSingleUse(Reg, Op, Def, MBB, Insert, LIS, MFI, MRI);
  730. } else if (ShouldRematerialize(*Def, AA, TII)) {
  731. Insert =
  732. RematerializeCheapDef(Reg, Op, *Def, MBB, Insert->getIterator(),
  733. LIS, MFI, MRI, TII, TRI);
  734. } else if (UseTee && CanMove &&
  735. OneUseDominatesOtherUses(Reg, Op, MBB, MRI, MDT, LIS, MFI)) {
  736. Insert = MoveAndTeeForMultiUse(Reg, Op, Def, MBB, Insert, LIS, MFI,
  737. MRI, TII);
  738. } else {
  739. // We failed to stackify the operand. If the problem was ordering
  740. // constraints, Commuting may be able to help.
  741. if (!CanMove && SameBlock)
  742. Commuting.MaybeCommute(Insert, TreeWalker, TII);
  743. // Proceed to the next operand.
  744. continue;
  745. }
  746. // If the instruction we just stackified is an IMPLICIT_DEF, convert it
  747. // to a constant 0 so that the def is explicit, and the push/pop
  748. // correspondence is maintained.
  749. if (Insert->getOpcode() == TargetOpcode::IMPLICIT_DEF)
  750. ConvertImplicitDefToConstZero(Insert, MRI, TII, MF);
  751. // We stackified an operand. Add the defining instruction's operands to
  752. // the worklist stack now to continue to build an ever deeper tree.
  753. Commuting.Reset();
  754. TreeWalker.PushOperands(Insert);
  755. }
  756. // If we stackified any operands, skip over the tree to start looking for
  757. // the next instruction we can build a tree on.
  758. if (Insert != &*MII) {
  759. ImposeStackOrdering(&*MII);
  760. MII = MachineBasicBlock::iterator(Insert).getReverse();
  761. Changed = true;
  762. }
  763. }
  764. }
  765. // If we used VALUE_STACK anywhere, add it to the live-in sets everywhere so
  766. // that it never looks like a use-before-def.
  767. if (Changed) {
  768. MF.getRegInfo().addLiveIn(WebAssembly::VALUE_STACK);
  769. for (MachineBasicBlock &MBB : MF)
  770. MBB.addLiveIn(WebAssembly::VALUE_STACK);
  771. }
  772. #ifndef NDEBUG
  773. // Verify that pushes and pops are performed in LIFO order.
  774. SmallVector<unsigned, 0> Stack;
  775. for (MachineBasicBlock &MBB : MF) {
  776. for (MachineInstr &MI : MBB) {
  777. if (MI.isDebugInstr())
  778. continue;
  779. for (MachineOperand &MO : reverse(MI.explicit_operands())) {
  780. if (!MO.isReg())
  781. continue;
  782. unsigned Reg = MO.getReg();
  783. if (MFI.isVRegStackified(Reg)) {
  784. if (MO.isDef())
  785. Stack.push_back(Reg);
  786. else
  787. assert(Stack.pop_back_val() == Reg &&
  788. "Register stack pop should be paired with a push");
  789. }
  790. }
  791. }
  792. // TODO: Generalize this code to support keeping values on the stack across
  793. // basic block boundaries.
  794. assert(Stack.empty() &&
  795. "Register stack pushes and pops should be balanced");
  796. }
  797. #endif
  798. return Changed;
  799. }