R600ControlFlowFinalizer.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723
  1. //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. /// \file
  11. /// This pass compute turns all control flow pseudo instructions into native one
  12. /// computing their address on the fly; it also sets STACK_SIZE info.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "AMDGPU.h"
  16. #include "AMDGPUSubtarget.h"
  17. #include "R600Defines.h"
  18. #include "R600InstrInfo.h"
  19. #include "R600MachineFunctionInfo.h"
  20. #include "R600RegisterInfo.h"
  21. #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
  22. #include "llvm/ADT/STLExtras.h"
  23. #include "llvm/ADT/SmallVector.h"
  24. #include "llvm/ADT/StringRef.h"
  25. #include "llvm/CodeGen/MachineBasicBlock.h"
  26. #include "llvm/CodeGen/MachineFunction.h"
  27. #include "llvm/CodeGen/MachineFunctionPass.h"
  28. #include "llvm/CodeGen/MachineInstr.h"
  29. #include "llvm/CodeGen/MachineInstrBuilder.h"
  30. #include "llvm/CodeGen/MachineOperand.h"
  31. #include "llvm/IR/CallingConv.h"
  32. #include "llvm/IR/DebugLoc.h"
  33. #include "llvm/IR/Function.h"
  34. #include "llvm/Pass.h"
  35. #include "llvm/Support/Compiler.h"
  36. #include "llvm/Support/Debug.h"
  37. #include "llvm/Support/MathExtras.h"
  38. #include "llvm/Support/raw_ostream.h"
  39. #include <algorithm>
  40. #include <cassert>
  41. #include <cstdint>
  42. #include <set>
  43. #include <utility>
  44. #include <vector>
  45. using namespace llvm;
  46. #define DEBUG_TYPE "r600cf"
  47. namespace {
  48. struct CFStack {
  49. enum StackItem {
  50. ENTRY = 0,
  51. SUB_ENTRY = 1,
  52. FIRST_NON_WQM_PUSH = 2,
  53. FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
  54. };
  55. const R600Subtarget *ST;
  56. std::vector<StackItem> BranchStack;
  57. std::vector<StackItem> LoopStack;
  58. unsigned MaxStackSize;
  59. unsigned CurrentEntries = 0;
  60. unsigned CurrentSubEntries = 0;
  61. CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
  62. // We need to reserve a stack entry for CALL_FS in vertex shaders.
  63. MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
  64. unsigned getLoopDepth();
  65. bool branchStackContains(CFStack::StackItem);
  66. bool requiresWorkAroundForInst(unsigned Opcode);
  67. unsigned getSubEntrySize(CFStack::StackItem Item);
  68. void updateMaxStackSize();
  69. void pushBranch(unsigned Opcode, bool isWQM = false);
  70. void pushLoop();
  71. void popBranch();
  72. void popLoop();
  73. };
  74. unsigned CFStack::getLoopDepth() {
  75. return LoopStack.size();
  76. }
  77. bool CFStack::branchStackContains(CFStack::StackItem Item) {
  78. for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
  79. E = BranchStack.end(); I != E; ++I) {
  80. if (*I == Item)
  81. return true;
  82. }
  83. return false;
  84. }
  85. bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
  86. if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
  87. getLoopDepth() > 1)
  88. return true;
  89. if (!ST->hasCFAluBug())
  90. return false;
  91. switch(Opcode) {
  92. default: return false;
  93. case R600::CF_ALU_PUSH_BEFORE:
  94. case R600::CF_ALU_ELSE_AFTER:
  95. case R600::CF_ALU_BREAK:
  96. case R600::CF_ALU_CONTINUE:
  97. if (CurrentSubEntries == 0)
  98. return false;
  99. if (ST->getWavefrontSize() == 64) {
  100. // We are being conservative here. We only require this work-around if
  101. // CurrentSubEntries > 3 &&
  102. // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
  103. //
  104. // We have to be conservative, because we don't know for certain that
  105. // our stack allocation algorithm for Evergreen/NI is correct. Applying this
  106. // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
  107. // resources without any problems.
  108. return CurrentSubEntries > 3;
  109. } else {
  110. assert(ST->getWavefrontSize() == 32);
  111. // We are being conservative here. We only require the work-around if
  112. // CurrentSubEntries > 7 &&
  113. // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
  114. // See the comment on the wavefront size == 64 case for why we are
  115. // being conservative.
  116. return CurrentSubEntries > 7;
  117. }
  118. }
  119. }
  120. unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
  121. switch(Item) {
  122. default:
  123. return 0;
  124. case CFStack::FIRST_NON_WQM_PUSH:
  125. assert(!ST->hasCaymanISA());
  126. if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
  127. // +1 For the push operation.
  128. // +2 Extra space required.
  129. return 3;
  130. } else {
  131. // Some documentation says that this is not necessary on Evergreen,
  132. // but experimentation has show that we need to allocate 1 extra
  133. // sub-entry for the first non-WQM push.
  134. // +1 For the push operation.
  135. // +1 Extra space required.
  136. return 2;
  137. }
  138. case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
  139. assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
  140. // +1 For the push operation.
  141. // +1 Extra space required.
  142. return 2;
  143. case CFStack::SUB_ENTRY:
  144. return 1;
  145. }
  146. }
  147. void CFStack::updateMaxStackSize() {
  148. unsigned CurrentStackSize =
  149. CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4);
  150. MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
  151. }
  152. void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
  153. CFStack::StackItem Item = CFStack::ENTRY;
  154. switch(Opcode) {
  155. case R600::CF_PUSH_EG:
  156. case R600::CF_ALU_PUSH_BEFORE:
  157. if (!isWQM) {
  158. if (!ST->hasCaymanISA() &&
  159. !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
  160. Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI
  161. // See comment in
  162. // CFStack::getSubEntrySize()
  163. else if (CurrentEntries > 0 &&
  164. ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
  165. !ST->hasCaymanISA() &&
  166. !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
  167. Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
  168. else
  169. Item = CFStack::SUB_ENTRY;
  170. } else
  171. Item = CFStack::ENTRY;
  172. break;
  173. }
  174. BranchStack.push_back(Item);
  175. if (Item == CFStack::ENTRY)
  176. CurrentEntries++;
  177. else
  178. CurrentSubEntries += getSubEntrySize(Item);
  179. updateMaxStackSize();
  180. }
  181. void CFStack::pushLoop() {
  182. LoopStack.push_back(CFStack::ENTRY);
  183. CurrentEntries++;
  184. updateMaxStackSize();
  185. }
  186. void CFStack::popBranch() {
  187. CFStack::StackItem Top = BranchStack.back();
  188. if (Top == CFStack::ENTRY)
  189. CurrentEntries--;
  190. else
  191. CurrentSubEntries-= getSubEntrySize(Top);
  192. BranchStack.pop_back();
  193. }
  194. void CFStack::popLoop() {
  195. CurrentEntries--;
  196. LoopStack.pop_back();
  197. }
  198. class R600ControlFlowFinalizer : public MachineFunctionPass {
  199. private:
  200. using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
  201. enum ControlFlowInstruction {
  202. CF_TC,
  203. CF_VC,
  204. CF_CALL_FS,
  205. CF_WHILE_LOOP,
  206. CF_END_LOOP,
  207. CF_LOOP_BREAK,
  208. CF_LOOP_CONTINUE,
  209. CF_JUMP,
  210. CF_ELSE,
  211. CF_POP,
  212. CF_END
  213. };
  214. const R600InstrInfo *TII = nullptr;
  215. const R600RegisterInfo *TRI = nullptr;
  216. unsigned MaxFetchInst;
  217. const R600Subtarget *ST = nullptr;
  218. bool IsTrivialInst(MachineInstr &MI) const {
  219. switch (MI.getOpcode()) {
  220. case R600::KILL:
  221. case R600::RETURN:
  222. return true;
  223. default:
  224. return false;
  225. }
  226. }
  227. const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
  228. unsigned Opcode = 0;
  229. bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
  230. switch (CFI) {
  231. case CF_TC:
  232. Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
  233. break;
  234. case CF_VC:
  235. Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
  236. break;
  237. case CF_CALL_FS:
  238. Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
  239. break;
  240. case CF_WHILE_LOOP:
  241. Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
  242. break;
  243. case CF_END_LOOP:
  244. Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
  245. break;
  246. case CF_LOOP_BREAK:
  247. Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
  248. break;
  249. case CF_LOOP_CONTINUE:
  250. Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
  251. break;
  252. case CF_JUMP:
  253. Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
  254. break;
  255. case CF_ELSE:
  256. Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
  257. break;
  258. case CF_POP:
  259. Opcode = isEg ? R600::POP_EG : R600::POP_R600;
  260. break;
  261. case CF_END:
  262. if (ST->hasCaymanISA()) {
  263. Opcode = R600::CF_END_CM;
  264. break;
  265. }
  266. Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
  267. break;
  268. }
  269. assert (Opcode && "No opcode selected");
  270. return TII->get(Opcode);
  271. }
  272. bool isCompatibleWithClause(const MachineInstr &MI,
  273. std::set<unsigned> &DstRegs) const {
  274. unsigned DstMI, SrcMI;
  275. for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
  276. E = MI.operands_end();
  277. I != E; ++I) {
  278. const MachineOperand &MO = *I;
  279. if (!MO.isReg())
  280. continue;
  281. if (MO.isDef()) {
  282. unsigned Reg = MO.getReg();
  283. if (R600::R600_Reg128RegClass.contains(Reg))
  284. DstMI = Reg;
  285. else
  286. DstMI = TRI->getMatchingSuperReg(Reg,
  287. AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
  288. &R600::R600_Reg128RegClass);
  289. }
  290. if (MO.isUse()) {
  291. unsigned Reg = MO.getReg();
  292. if (R600::R600_Reg128RegClass.contains(Reg))
  293. SrcMI = Reg;
  294. else
  295. SrcMI = TRI->getMatchingSuperReg(Reg,
  296. AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
  297. &R600::R600_Reg128RegClass);
  298. }
  299. }
  300. if ((DstRegs.find(SrcMI) == DstRegs.end())) {
  301. DstRegs.insert(DstMI);
  302. return true;
  303. } else
  304. return false;
  305. }
  306. ClauseFile
  307. MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
  308. const {
  309. MachineBasicBlock::iterator ClauseHead = I;
  310. std::vector<MachineInstr *> ClauseContent;
  311. unsigned AluInstCount = 0;
  312. bool IsTex = TII->usesTextureCache(*ClauseHead);
  313. std::set<unsigned> DstRegs;
  314. for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
  315. if (IsTrivialInst(*I))
  316. continue;
  317. if (AluInstCount >= MaxFetchInst)
  318. break;
  319. if ((IsTex && !TII->usesTextureCache(*I)) ||
  320. (!IsTex && !TII->usesVertexCache(*I)))
  321. break;
  322. if (!isCompatibleWithClause(*I, DstRegs))
  323. break;
  324. AluInstCount ++;
  325. ClauseContent.push_back(&*I);
  326. }
  327. MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
  328. getHWInstrDesc(IsTex?CF_TC:CF_VC))
  329. .addImm(0) // ADDR
  330. .addImm(AluInstCount - 1); // COUNT
  331. return ClauseFile(MIb, std::move(ClauseContent));
  332. }
  333. void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
  334. static const unsigned LiteralRegs[] = {
  335. R600::ALU_LITERAL_X,
  336. R600::ALU_LITERAL_Y,
  337. R600::ALU_LITERAL_Z,
  338. R600::ALU_LITERAL_W
  339. };
  340. const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
  341. TII->getSrcs(MI);
  342. for (const auto &Src:Srcs) {
  343. if (Src.first->getReg() != R600::ALU_LITERAL_X)
  344. continue;
  345. int64_t Imm = Src.second;
  346. std::vector<MachineOperand *>::iterator It =
  347. llvm::find_if(Lits, [&](MachineOperand *val) {
  348. return val->isImm() && (val->getImm() == Imm);
  349. });
  350. // Get corresponding Operand
  351. MachineOperand &Operand = MI.getOperand(
  352. TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
  353. if (It != Lits.end()) {
  354. // Reuse existing literal reg
  355. unsigned Index = It - Lits.begin();
  356. Src.first->setReg(LiteralRegs[Index]);
  357. } else {
  358. // Allocate new literal reg
  359. assert(Lits.size() < 4 && "Too many literals in Instruction Group");
  360. Src.first->setReg(LiteralRegs[Lits.size()]);
  361. Lits.push_back(&Operand);
  362. }
  363. }
  364. }
  365. MachineBasicBlock::iterator insertLiterals(
  366. MachineBasicBlock::iterator InsertPos,
  367. const std::vector<unsigned> &Literals) const {
  368. MachineBasicBlock *MBB = InsertPos->getParent();
  369. for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
  370. unsigned LiteralPair0 = Literals[i];
  371. unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
  372. InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
  373. TII->get(R600::LITERALS))
  374. .addImm(LiteralPair0)
  375. .addImm(LiteralPair1);
  376. }
  377. return InsertPos;
  378. }
  379. ClauseFile
  380. MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
  381. const {
  382. MachineInstr &ClauseHead = *I;
  383. std::vector<MachineInstr *> ClauseContent;
  384. I++;
  385. for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
  386. if (IsTrivialInst(*I)) {
  387. ++I;
  388. continue;
  389. }
  390. if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
  391. break;
  392. std::vector<MachineOperand *>Literals;
  393. if (I->isBundle()) {
  394. MachineInstr &DeleteMI = *I;
  395. MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
  396. while (++BI != E && BI->isBundledWithPred()) {
  397. BI->unbundleFromPred();
  398. for (MachineOperand &MO : BI->operands()) {
  399. if (MO.isReg() && MO.isInternalRead())
  400. MO.setIsInternalRead(false);
  401. }
  402. getLiteral(*BI, Literals);
  403. ClauseContent.push_back(&*BI);
  404. }
  405. I = BI;
  406. DeleteMI.eraseFromParent();
  407. } else {
  408. getLiteral(*I, Literals);
  409. ClauseContent.push_back(&*I);
  410. I++;
  411. }
  412. for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
  413. MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
  414. TII->get(R600::LITERALS));
  415. if (Literals[i]->isImm()) {
  416. MILit.addImm(Literals[i]->getImm());
  417. } else {
  418. MILit.addGlobalAddress(Literals[i]->getGlobal(),
  419. Literals[i]->getOffset());
  420. }
  421. if (i + 1 < e) {
  422. if (Literals[i + 1]->isImm()) {
  423. MILit.addImm(Literals[i + 1]->getImm());
  424. } else {
  425. MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
  426. Literals[i + 1]->getOffset());
  427. }
  428. } else
  429. MILit.addImm(0);
  430. ClauseContent.push_back(MILit);
  431. }
  432. }
  433. assert(ClauseContent.size() < 128 && "ALU clause is too big");
  434. ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
  435. return ClauseFile(&ClauseHead, std::move(ClauseContent));
  436. }
  437. void EmitFetchClause(MachineBasicBlock::iterator InsertPos,
  438. const DebugLoc &DL, ClauseFile &Clause,
  439. unsigned &CfCount) {
  440. CounterPropagateAddr(*Clause.first, CfCount);
  441. MachineBasicBlock *BB = Clause.first->getParent();
  442. BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
  443. for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
  444. BB->splice(InsertPos, BB, Clause.second[i]);
  445. }
  446. CfCount += 2 * Clause.second.size();
  447. }
  448. void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL,
  449. ClauseFile &Clause, unsigned &CfCount) {
  450. Clause.first->getOperand(0).setImm(0);
  451. CounterPropagateAddr(*Clause.first, CfCount);
  452. MachineBasicBlock *BB = Clause.first->getParent();
  453. BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
  454. for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
  455. BB->splice(InsertPos, BB, Clause.second[i]);
  456. }
  457. CfCount += Clause.second.size();
  458. }
  459. void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
  460. MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
  461. }
  462. void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
  463. unsigned Addr) const {
  464. for (MachineInstr *MI : MIs) {
  465. CounterPropagateAddr(*MI, Addr);
  466. }
  467. }
  468. public:
  469. static char ID;
  470. R600ControlFlowFinalizer() : MachineFunctionPass(ID) {}
  471. bool runOnMachineFunction(MachineFunction &MF) override {
  472. ST = &MF.getSubtarget<R600Subtarget>();
  473. MaxFetchInst = ST->getTexVTXClauseSize();
  474. TII = ST->getInstrInfo();
  475. TRI = ST->getRegisterInfo();
  476. R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
  477. CFStack CFStack(ST, MF.getFunction().getCallingConv());
  478. for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
  479. ++MB) {
  480. MachineBasicBlock &MBB = *MB;
  481. unsigned CfCount = 0;
  482. std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
  483. std::vector<MachineInstr * > IfThenElseStack;
  484. if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) {
  485. BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
  486. getHWInstrDesc(CF_CALL_FS));
  487. CfCount++;
  488. }
  489. std::vector<ClauseFile> FetchClauses, AluClauses;
  490. std::vector<MachineInstr *> LastAlu(1);
  491. std::vector<MachineInstr *> ToPopAfter;
  492. for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
  493. I != E;) {
  494. if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
  495. LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
  496. FetchClauses.push_back(MakeFetchClause(MBB, I));
  497. CfCount++;
  498. LastAlu.back() = nullptr;
  499. continue;
  500. }
  501. MachineBasicBlock::iterator MI = I;
  502. if (MI->getOpcode() != R600::ENDIF)
  503. LastAlu.back() = nullptr;
  504. if (MI->getOpcode() == R600::CF_ALU)
  505. LastAlu.back() = &*MI;
  506. I++;
  507. bool RequiresWorkAround =
  508. CFStack.requiresWorkAroundForInst(MI->getOpcode());
  509. switch (MI->getOpcode()) {
  510. case R600::CF_ALU_PUSH_BEFORE:
  511. if (RequiresWorkAround) {
  512. LLVM_DEBUG(dbgs()
  513. << "Applying bug work-around for ALU_PUSH_BEFORE\n");
  514. BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
  515. .addImm(CfCount + 1)
  516. .addImm(1);
  517. MI->setDesc(TII->get(R600::CF_ALU));
  518. CfCount++;
  519. CFStack.pushBranch(R600::CF_PUSH_EG);
  520. } else
  521. CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
  522. LLVM_FALLTHROUGH;
  523. case R600::CF_ALU:
  524. I = MI;
  525. AluClauses.push_back(MakeALUClause(MBB, I));
  526. LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
  527. CfCount++;
  528. break;
  529. case R600::WHILELOOP: {
  530. CFStack.pushLoop();
  531. MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
  532. getHWInstrDesc(CF_WHILE_LOOP))
  533. .addImm(1);
  534. std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
  535. std::set<MachineInstr *>());
  536. Pair.second.insert(MIb);
  537. LoopStack.push_back(std::move(Pair));
  538. MI->eraseFromParent();
  539. CfCount++;
  540. break;
  541. }
  542. case R600::ENDLOOP: {
  543. CFStack.popLoop();
  544. std::pair<unsigned, std::set<MachineInstr *>> Pair =
  545. std::move(LoopStack.back());
  546. LoopStack.pop_back();
  547. CounterPropagateAddr(Pair.second, CfCount);
  548. BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
  549. .addImm(Pair.first + 1);
  550. MI->eraseFromParent();
  551. CfCount++;
  552. break;
  553. }
  554. case R600::IF_PREDICATE_SET: {
  555. LastAlu.push_back(nullptr);
  556. MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
  557. getHWInstrDesc(CF_JUMP))
  558. .addImm(0)
  559. .addImm(0);
  560. IfThenElseStack.push_back(MIb);
  561. LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
  562. MI->eraseFromParent();
  563. CfCount++;
  564. break;
  565. }
  566. case R600::ELSE: {
  567. MachineInstr * JumpInst = IfThenElseStack.back();
  568. IfThenElseStack.pop_back();
  569. CounterPropagateAddr(*JumpInst, CfCount);
  570. MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
  571. getHWInstrDesc(CF_ELSE))
  572. .addImm(0)
  573. .addImm(0);
  574. LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
  575. IfThenElseStack.push_back(MIb);
  576. MI->eraseFromParent();
  577. CfCount++;
  578. break;
  579. }
  580. case R600::ENDIF: {
  581. CFStack.popBranch();
  582. if (LastAlu.back()) {
  583. ToPopAfter.push_back(LastAlu.back());
  584. } else {
  585. MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
  586. getHWInstrDesc(CF_POP))
  587. .addImm(CfCount + 1)
  588. .addImm(1);
  589. (void)MIb;
  590. LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
  591. CfCount++;
  592. }
  593. MachineInstr *IfOrElseInst = IfThenElseStack.back();
  594. IfThenElseStack.pop_back();
  595. CounterPropagateAddr(*IfOrElseInst, CfCount);
  596. IfOrElseInst->getOperand(1).setImm(1);
  597. LastAlu.pop_back();
  598. MI->eraseFromParent();
  599. break;
  600. }
  601. case R600::BREAK: {
  602. CfCount ++;
  603. MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
  604. getHWInstrDesc(CF_LOOP_BREAK))
  605. .addImm(0);
  606. LoopStack.back().second.insert(MIb);
  607. MI->eraseFromParent();
  608. break;
  609. }
  610. case R600::CONTINUE: {
  611. MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
  612. getHWInstrDesc(CF_LOOP_CONTINUE))
  613. .addImm(0);
  614. LoopStack.back().second.insert(MIb);
  615. MI->eraseFromParent();
  616. CfCount++;
  617. break;
  618. }
  619. case R600::RETURN: {
  620. DebugLoc DL = MBB.findDebugLoc(MI);
  621. BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
  622. CfCount++;
  623. if (CfCount % 2) {
  624. BuildMI(MBB, I, DL, TII->get(R600::PAD));
  625. CfCount++;
  626. }
  627. MI->eraseFromParent();
  628. for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
  629. EmitFetchClause(I, DL, FetchClauses[i], CfCount);
  630. for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
  631. EmitALUClause(I, DL, AluClauses[i], CfCount);
  632. break;
  633. }
  634. default:
  635. if (TII->isExport(MI->getOpcode())) {
  636. LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
  637. CfCount++;
  638. }
  639. break;
  640. }
  641. }
  642. for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
  643. MachineInstr *Alu = ToPopAfter[i];
  644. BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
  645. TII->get(R600::CF_ALU_POP_AFTER))
  646. .addImm(Alu->getOperand(0).getImm())
  647. .addImm(Alu->getOperand(1).getImm())
  648. .addImm(Alu->getOperand(2).getImm())
  649. .addImm(Alu->getOperand(3).getImm())
  650. .addImm(Alu->getOperand(4).getImm())
  651. .addImm(Alu->getOperand(5).getImm())
  652. .addImm(Alu->getOperand(6).getImm())
  653. .addImm(Alu->getOperand(7).getImm())
  654. .addImm(Alu->getOperand(8).getImm());
  655. Alu->eraseFromParent();
  656. }
  657. MFI->CFStackSize = CFStack.MaxStackSize;
  658. }
  659. return false;
  660. }
  661. StringRef getPassName() const override {
  662. return "R600 Control Flow Finalizer Pass";
  663. }
  664. };
  665. } // end anonymous namespace
  666. INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE,
  667. "R600 Control Flow Finalizer", false, false)
  668. INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE,
  669. "R600 Control Flow Finalizer", false, false)
  670. char R600ControlFlowFinalizer::ID = 0;
  671. char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID;
  672. FunctionPass *llvm::createR600ControlFlowFinalizer() {
  673. return new R600ControlFlowFinalizer();
  674. }