PPCCTRLoops.cpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663
  1. //===-- PPCCTRLoops.cpp - Identify and generate CTR loops -----------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This pass identifies loops where we can generate the PPC branch instructions
  11. // that decrement and test the count register (CTR) (bdnz and friends).
  12. //
  13. // The pattern that defines the induction variable can changed depending on
  14. // prior optimizations. For example, the IndVarSimplify phase run by 'opt'
  15. // normalizes induction variables, and the Loop Strength Reduction pass
  16. // run by 'llc' may also make changes to the induction variable.
  17. //
  18. // Criteria for CTR loops:
  19. // - Countable loops (w/ ind. var for a trip count)
  20. // - Try inner-most loops first
  21. // - No nested CTR loops.
  22. // - No function calls in loops.
  23. //
  24. //===----------------------------------------------------------------------===//
  25. #include "llvm/Transforms/Scalar.h"
  26. #include "PPC.h"
  27. #include "PPCTargetMachine.h"
  28. #include "llvm/ADT/STLExtras.h"
  29. #include "llvm/ADT/Statistic.h"
  30. #include "llvm/Analysis/LoopInfo.h"
  31. #include "llvm/Analysis/ScalarEvolutionExpander.h"
  32. #include "llvm/IR/Constants.h"
  33. #include "llvm/IR/DerivedTypes.h"
  34. #include "llvm/IR/Dominators.h"
  35. #include "llvm/IR/InlineAsm.h"
  36. #include "llvm/IR/Instructions.h"
  37. #include "llvm/IR/IntrinsicInst.h"
  38. #include "llvm/IR/Module.h"
  39. #include "llvm/IR/ValueHandle.h"
  40. #include "llvm/PassSupport.h"
  41. #include "llvm/Support/CommandLine.h"
  42. #include "llvm/Support/Debug.h"
  43. #include "llvm/Support/raw_ostream.h"
  44. #include "llvm/Target/TargetLibraryInfo.h"
  45. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  46. #include "llvm/Transforms/Utils/Local.h"
  47. #include "llvm/Transforms/Utils/LoopUtils.h"
  48. #ifndef NDEBUG
  49. #include "llvm/CodeGen/MachineDominators.h"
  50. #include "llvm/CodeGen/MachineFunction.h"
  51. #include "llvm/CodeGen/MachineFunctionPass.h"
  52. #include "llvm/CodeGen/MachineRegisterInfo.h"
  53. #endif
  54. #include <algorithm>
  55. #include <vector>
  56. using namespace llvm;
  57. #define DEBUG_TYPE "ctrloops"
  58. #ifndef NDEBUG
  59. static cl::opt<int> CTRLoopLimit("ppc-max-ctrloop", cl::Hidden, cl::init(-1));
  60. #endif
  61. STATISTIC(NumCTRLoops, "Number of loops converted to CTR loops");
  62. namespace llvm {
  63. void initializePPCCTRLoopsPass(PassRegistry&);
  64. #ifndef NDEBUG
  65. void initializePPCCTRLoopsVerifyPass(PassRegistry&);
  66. #endif
  67. }
  68. namespace {
  69. struct PPCCTRLoops : public FunctionPass {
  70. #ifndef NDEBUG
  71. static int Counter;
  72. #endif
  73. public:
  74. static char ID;
  75. PPCCTRLoops() : FunctionPass(ID), TM(nullptr) {
  76. initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
  77. }
  78. PPCCTRLoops(PPCTargetMachine &TM) : FunctionPass(ID), TM(&TM) {
  79. initializePPCCTRLoopsPass(*PassRegistry::getPassRegistry());
  80. }
  81. bool runOnFunction(Function &F) override;
  82. void getAnalysisUsage(AnalysisUsage &AU) const override {
  83. AU.addRequired<LoopInfo>();
  84. AU.addPreserved<LoopInfo>();
  85. AU.addRequired<DominatorTreeWrapperPass>();
  86. AU.addPreserved<DominatorTreeWrapperPass>();
  87. AU.addRequired<ScalarEvolution>();
  88. }
  89. private:
  90. bool mightUseCTR(const Triple &TT, BasicBlock *BB);
  91. bool convertToCTRLoop(Loop *L);
  92. private:
  93. PPCTargetMachine *TM;
  94. LoopInfo *LI;
  95. ScalarEvolution *SE;
  96. const DataLayout *DL;
  97. DominatorTree *DT;
  98. const TargetLibraryInfo *LibInfo;
  99. };
  100. char PPCCTRLoops::ID = 0;
  101. #ifndef NDEBUG
  102. int PPCCTRLoops::Counter = 0;
  103. #endif
  104. #ifndef NDEBUG
  105. struct PPCCTRLoopsVerify : public MachineFunctionPass {
  106. public:
  107. static char ID;
  108. PPCCTRLoopsVerify() : MachineFunctionPass(ID) {
  109. initializePPCCTRLoopsVerifyPass(*PassRegistry::getPassRegistry());
  110. }
  111. void getAnalysisUsage(AnalysisUsage &AU) const override {
  112. AU.addRequired<MachineDominatorTree>();
  113. MachineFunctionPass::getAnalysisUsage(AU);
  114. }
  115. bool runOnMachineFunction(MachineFunction &MF) override;
  116. private:
  117. MachineDominatorTree *MDT;
  118. };
  119. char PPCCTRLoopsVerify::ID = 0;
  120. #endif // NDEBUG
  121. } // end anonymous namespace
  122. INITIALIZE_PASS_BEGIN(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
  123. false, false)
  124. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  125. INITIALIZE_PASS_DEPENDENCY(LoopInfo)
  126. INITIALIZE_PASS_DEPENDENCY(ScalarEvolution)
  127. INITIALIZE_PASS_END(PPCCTRLoops, "ppc-ctr-loops", "PowerPC CTR Loops",
  128. false, false)
  129. FunctionPass *llvm::createPPCCTRLoops(PPCTargetMachine &TM) {
  130. return new PPCCTRLoops(TM);
  131. }
  132. #ifndef NDEBUG
  133. INITIALIZE_PASS_BEGIN(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
  134. "PowerPC CTR Loops Verify", false, false)
  135. INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
  136. INITIALIZE_PASS_END(PPCCTRLoopsVerify, "ppc-ctr-loops-verify",
  137. "PowerPC CTR Loops Verify", false, false)
  138. FunctionPass *llvm::createPPCCTRLoopsVerify() {
  139. return new PPCCTRLoopsVerify();
  140. }
  141. #endif // NDEBUG
  142. bool PPCCTRLoops::runOnFunction(Function &F) {
  143. LI = &getAnalysis<LoopInfo>();
  144. SE = &getAnalysis<ScalarEvolution>();
  145. DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
  146. DataLayoutPass *DLP = getAnalysisIfAvailable<DataLayoutPass>();
  147. DL = DLP ? &DLP->getDataLayout() : nullptr;
  148. LibInfo = getAnalysisIfAvailable<TargetLibraryInfo>();
  149. bool MadeChange = false;
  150. for (LoopInfo::iterator I = LI->begin(), E = LI->end();
  151. I != E; ++I) {
  152. Loop *L = *I;
  153. if (!L->getParentLoop())
  154. MadeChange |= convertToCTRLoop(L);
  155. }
  156. return MadeChange;
  157. }
  158. static bool isLargeIntegerTy(bool Is32Bit, Type *Ty) {
  159. if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
  160. return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
  161. return false;
  162. }
  163. bool PPCCTRLoops::mightUseCTR(const Triple &TT, BasicBlock *BB) {
  164. for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
  165. J != JE; ++J) {
  166. if (CallInst *CI = dyn_cast<CallInst>(J)) {
  167. if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledValue())) {
  168. // Inline ASM is okay, unless it clobbers the ctr register.
  169. InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
  170. for (unsigned i = 0, ie = CIV.size(); i < ie; ++i) {
  171. InlineAsm::ConstraintInfo &C = CIV[i];
  172. if (C.Type != InlineAsm::isInput)
  173. for (unsigned j = 0, je = C.Codes.size(); j < je; ++j)
  174. if (StringRef(C.Codes[j]).equals_lower("{ctr}"))
  175. return true;
  176. }
  177. continue;
  178. }
  179. if (!TM)
  180. return true;
  181. const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
  182. if (Function *F = CI->getCalledFunction()) {
  183. // Most intrinsics don't become function calls, but some might.
  184. // sin, cos, exp and log are always calls.
  185. unsigned Opcode;
  186. if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
  187. switch (F->getIntrinsicID()) {
  188. default: continue;
  189. // VisualStudio defines setjmp as _setjmp
  190. #if defined(_MSC_VER) && defined(setjmp) && \
  191. !defined(setjmp_undefined_for_msvc)
  192. # pragma push_macro("setjmp")
  193. # undef setjmp
  194. # define setjmp_undefined_for_msvc
  195. #endif
  196. case Intrinsic::setjmp:
  197. #if defined(_MSC_VER) && defined(setjmp_undefined_for_msvc)
  198. // let's return it to _setjmp state
  199. # pragma pop_macro("setjmp")
  200. # undef setjmp_undefined_for_msvc
  201. #endif
  202. case Intrinsic::longjmp:
  203. // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
  204. // because, although it does clobber the counter register, the
  205. // control can't then return to inside the loop unless there is also
  206. // an eh_sjlj_setjmp.
  207. case Intrinsic::eh_sjlj_setjmp:
  208. case Intrinsic::memcpy:
  209. case Intrinsic::memmove:
  210. case Intrinsic::memset:
  211. case Intrinsic::powi:
  212. case Intrinsic::log:
  213. case Intrinsic::log2:
  214. case Intrinsic::log10:
  215. case Intrinsic::exp:
  216. case Intrinsic::exp2:
  217. case Intrinsic::pow:
  218. case Intrinsic::sin:
  219. case Intrinsic::cos:
  220. return true;
  221. case Intrinsic::copysign:
  222. if (CI->getArgOperand(0)->getType()->getScalarType()->
  223. isPPC_FP128Ty())
  224. return true;
  225. else
  226. continue; // ISD::FCOPYSIGN is never a library call.
  227. case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
  228. case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
  229. case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
  230. case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
  231. case Intrinsic::rint: Opcode = ISD::FRINT; break;
  232. case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
  233. case Intrinsic::round: Opcode = ISD::FROUND; break;
  234. }
  235. }
  236. // PowerPC does not use [US]DIVREM or other library calls for
  237. // operations on regular types which are not otherwise library calls
  238. // (i.e. soft float or atomics). If adapting for targets that do,
  239. // additional care is required here.
  240. LibFunc::Func Func;
  241. if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
  242. LibInfo->getLibFunc(F->getName(), Func) &&
  243. LibInfo->hasOptimizedCodeGen(Func)) {
  244. // Non-read-only functions are never treated as intrinsics.
  245. if (!CI->onlyReadsMemory())
  246. return true;
  247. // Conversion happens only for FP calls.
  248. if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
  249. return true;
  250. switch (Func) {
  251. default: return true;
  252. case LibFunc::copysign:
  253. case LibFunc::copysignf:
  254. continue; // ISD::FCOPYSIGN is never a library call.
  255. case LibFunc::copysignl:
  256. return true;
  257. case LibFunc::fabs:
  258. case LibFunc::fabsf:
  259. case LibFunc::fabsl:
  260. continue; // ISD::FABS is never a library call.
  261. case LibFunc::sqrt:
  262. case LibFunc::sqrtf:
  263. case LibFunc::sqrtl:
  264. Opcode = ISD::FSQRT; break;
  265. case LibFunc::floor:
  266. case LibFunc::floorf:
  267. case LibFunc::floorl:
  268. Opcode = ISD::FFLOOR; break;
  269. case LibFunc::nearbyint:
  270. case LibFunc::nearbyintf:
  271. case LibFunc::nearbyintl:
  272. Opcode = ISD::FNEARBYINT; break;
  273. case LibFunc::ceil:
  274. case LibFunc::ceilf:
  275. case LibFunc::ceill:
  276. Opcode = ISD::FCEIL; break;
  277. case LibFunc::rint:
  278. case LibFunc::rintf:
  279. case LibFunc::rintl:
  280. Opcode = ISD::FRINT; break;
  281. case LibFunc::round:
  282. case LibFunc::roundf:
  283. case LibFunc::roundl:
  284. Opcode = ISD::FROUND; break;
  285. case LibFunc::trunc:
  286. case LibFunc::truncf:
  287. case LibFunc::truncl:
  288. Opcode = ISD::FTRUNC; break;
  289. }
  290. MVT VTy =
  291. TLI->getSimpleValueType(CI->getArgOperand(0)->getType(), true);
  292. if (VTy == MVT::Other)
  293. return true;
  294. if (TLI->isOperationLegalOrCustom(Opcode, VTy))
  295. continue;
  296. else if (VTy.isVector() &&
  297. TLI->isOperationLegalOrCustom(Opcode, VTy.getScalarType()))
  298. continue;
  299. return true;
  300. }
  301. }
  302. return true;
  303. } else if (isa<BinaryOperator>(J) &&
  304. J->getType()->getScalarType()->isPPC_FP128Ty()) {
  305. // Most operations on ppc_f128 values become calls.
  306. return true;
  307. } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
  308. isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
  309. CastInst *CI = cast<CastInst>(J);
  310. if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
  311. CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
  312. isLargeIntegerTy(TT.isArch32Bit(), CI->getSrcTy()->getScalarType()) ||
  313. isLargeIntegerTy(TT.isArch32Bit(), CI->getDestTy()->getScalarType()))
  314. return true;
  315. } else if (isLargeIntegerTy(TT.isArch32Bit(),
  316. J->getType()->getScalarType()) &&
  317. (J->getOpcode() == Instruction::UDiv ||
  318. J->getOpcode() == Instruction::SDiv ||
  319. J->getOpcode() == Instruction::URem ||
  320. J->getOpcode() == Instruction::SRem)) {
  321. return true;
  322. } else if (TT.isArch32Bit() &&
  323. isLargeIntegerTy(false, J->getType()->getScalarType()) &&
  324. (J->getOpcode() == Instruction::Shl ||
  325. J->getOpcode() == Instruction::AShr ||
  326. J->getOpcode() == Instruction::LShr)) {
  327. // Only on PPC32, for 128-bit integers (specifically not 64-bit
  328. // integers), these might be runtime calls.
  329. return true;
  330. } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
  331. // On PowerPC, indirect jumps use the counter register.
  332. return true;
  333. } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
  334. if (!TM)
  335. return true;
  336. const TargetLowering *TLI = TM->getSubtargetImpl()->getTargetLowering();
  337. if (TLI->supportJumpTables() &&
  338. SI->getNumCases()+1 >= (unsigned) TLI->getMinimumJumpTableEntries())
  339. return true;
  340. }
  341. }
  342. return false;
  343. }
  344. bool PPCCTRLoops::convertToCTRLoop(Loop *L) {
  345. bool MadeChange = false;
  346. Triple TT = Triple(L->getHeader()->getParent()->getParent()->
  347. getTargetTriple());
  348. if (!TT.isArch32Bit() && !TT.isArch64Bit())
  349. return MadeChange; // Unknown arch. type.
  350. // Process nested loops first.
  351. for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) {
  352. MadeChange |= convertToCTRLoop(*I);
  353. }
  354. // If a nested loop has been converted, then we can't convert this loop.
  355. if (MadeChange)
  356. return MadeChange;
  357. #ifndef NDEBUG
  358. // Stop trying after reaching the limit (if any).
  359. int Limit = CTRLoopLimit;
  360. if (Limit >= 0) {
  361. if (Counter >= CTRLoopLimit)
  362. return false;
  363. Counter++;
  364. }
  365. #endif
  366. // We don't want to spill/restore the counter register, and so we don't
  367. // want to use the counter register if the loop contains calls.
  368. for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
  369. I != IE; ++I)
  370. if (mightUseCTR(TT, *I))
  371. return MadeChange;
  372. SmallVector<BasicBlock*, 4> ExitingBlocks;
  373. L->getExitingBlocks(ExitingBlocks);
  374. BasicBlock *CountedExitBlock = nullptr;
  375. const SCEV *ExitCount = nullptr;
  376. BranchInst *CountedExitBranch = nullptr;
  377. for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
  378. IE = ExitingBlocks.end(); I != IE; ++I) {
  379. const SCEV *EC = SE->getExitCount(L, *I);
  380. DEBUG(dbgs() << "Exit Count for " << *L << " from block " <<
  381. (*I)->getName() << ": " << *EC << "\n");
  382. if (isa<SCEVCouldNotCompute>(EC))
  383. continue;
  384. if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
  385. if (ConstEC->getValue()->isZero())
  386. continue;
  387. } else if (!SE->isLoopInvariant(EC, L))
  388. continue;
  389. if (SE->getTypeSizeInBits(EC->getType()) > (TT.isArch64Bit() ? 64 : 32))
  390. continue;
  391. // We now have a loop-invariant count of loop iterations (which is not the
  392. // constant zero) for which we know that this loop will not exit via this
  393. // exisiting block.
  394. // We need to make sure that this block will run on every loop iteration.
  395. // For this to be true, we must dominate all blocks with backedges. Such
  396. // blocks are in-loop predecessors to the header block.
  397. bool NotAlways = false;
  398. for (pred_iterator PI = pred_begin(L->getHeader()),
  399. PIE = pred_end(L->getHeader()); PI != PIE; ++PI) {
  400. if (!L->contains(*PI))
  401. continue;
  402. if (!DT->dominates(*I, *PI)) {
  403. NotAlways = true;
  404. break;
  405. }
  406. }
  407. if (NotAlways)
  408. continue;
  409. // Make sure this blocks ends with a conditional branch.
  410. Instruction *TI = (*I)->getTerminator();
  411. if (!TI)
  412. continue;
  413. if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
  414. if (!BI->isConditional())
  415. continue;
  416. CountedExitBranch = BI;
  417. } else
  418. continue;
  419. // Note that this block may not be the loop latch block, even if the loop
  420. // has a latch block.
  421. CountedExitBlock = *I;
  422. ExitCount = EC;
  423. break;
  424. }
  425. if (!CountedExitBlock)
  426. return MadeChange;
  427. BasicBlock *Preheader = L->getLoopPreheader();
  428. // If we don't have a preheader, then insert one. If we already have a
  429. // preheader, then we can use it (except if the preheader contains a use of
  430. // the CTR register because some such uses might be reordered by the
  431. // selection DAG after the mtctr instruction).
  432. if (!Preheader || mightUseCTR(TT, Preheader))
  433. Preheader = InsertPreheaderForLoop(L, this);
  434. if (!Preheader)
  435. return MadeChange;
  436. DEBUG(dbgs() << "Preheader for exit count: " << Preheader->getName() << "\n");
  437. // Insert the count into the preheader and replace the condition used by the
  438. // selected branch.
  439. MadeChange = true;
  440. SCEVExpander SCEVE(*SE, "loopcnt");
  441. LLVMContext &C = SE->getContext();
  442. Type *CountType = TT.isArch64Bit() ? Type::getInt64Ty(C) :
  443. Type::getInt32Ty(C);
  444. if (!ExitCount->getType()->isPointerTy() &&
  445. ExitCount->getType() != CountType)
  446. ExitCount = SE->getZeroExtendExpr(ExitCount, CountType);
  447. ExitCount = SE->getAddExpr(ExitCount,
  448. SE->getConstant(CountType, 1));
  449. Value *ECValue = SCEVE.expandCodeFor(ExitCount, CountType,
  450. Preheader->getTerminator());
  451. IRBuilder<> CountBuilder(Preheader->getTerminator());
  452. Module *M = Preheader->getParent()->getParent();
  453. Value *MTCTRFunc = Intrinsic::getDeclaration(M, Intrinsic::ppc_mtctr,
  454. CountType);
  455. CountBuilder.CreateCall(MTCTRFunc, ECValue);
  456. IRBuilder<> CondBuilder(CountedExitBranch);
  457. Value *DecFunc =
  458. Intrinsic::getDeclaration(M, Intrinsic::ppc_is_decremented_ctr_nonzero);
  459. Value *NewCond = CondBuilder.CreateCall(DecFunc);
  460. Value *OldCond = CountedExitBranch->getCondition();
  461. CountedExitBranch->setCondition(NewCond);
  462. // The false branch must exit the loop.
  463. if (!L->contains(CountedExitBranch->getSuccessor(0)))
  464. CountedExitBranch->swapSuccessors();
  465. // The old condition may be dead now, and may have even created a dead PHI
  466. // (the original induction variable).
  467. RecursivelyDeleteTriviallyDeadInstructions(OldCond);
  468. DeleteDeadPHIs(CountedExitBlock);
  469. ++NumCTRLoops;
  470. return MadeChange;
  471. }
  472. #ifndef NDEBUG
  473. static bool clobbersCTR(const MachineInstr *MI) {
  474. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
  475. const MachineOperand &MO = MI->getOperand(i);
  476. if (MO.isReg()) {
  477. if (MO.isDef() && (MO.getReg() == PPC::CTR || MO.getReg() == PPC::CTR8))
  478. return true;
  479. } else if (MO.isRegMask()) {
  480. if (MO.clobbersPhysReg(PPC::CTR) || MO.clobbersPhysReg(PPC::CTR8))
  481. return true;
  482. }
  483. }
  484. return false;
  485. }
  486. static bool verifyCTRBranch(MachineBasicBlock *MBB,
  487. MachineBasicBlock::iterator I) {
  488. MachineBasicBlock::iterator BI = I;
  489. SmallSet<MachineBasicBlock *, 16> Visited;
  490. SmallVector<MachineBasicBlock *, 8> Preds;
  491. bool CheckPreds;
  492. if (I == MBB->begin()) {
  493. Visited.insert(MBB);
  494. goto queue_preds;
  495. } else
  496. --I;
  497. check_block:
  498. Visited.insert(MBB);
  499. if (I == MBB->end())
  500. goto queue_preds;
  501. CheckPreds = true;
  502. for (MachineBasicBlock::iterator IE = MBB->begin();; --I) {
  503. unsigned Opc = I->getOpcode();
  504. if (Opc == PPC::MTCTRloop || Opc == PPC::MTCTR8loop) {
  505. CheckPreds = false;
  506. break;
  507. }
  508. if (I != BI && clobbersCTR(I)) {
  509. DEBUG(dbgs() << "BB#" << MBB->getNumber() << " (" <<
  510. MBB->getFullName() << ") instruction " << *I <<
  511. " clobbers CTR, invalidating " << "BB#" <<
  512. BI->getParent()->getNumber() << " (" <<
  513. BI->getParent()->getFullName() << ") instruction " <<
  514. *BI << "\n");
  515. return false;
  516. }
  517. if (I == IE)
  518. break;
  519. }
  520. if (!CheckPreds && Preds.empty())
  521. return true;
  522. if (CheckPreds) {
  523. queue_preds:
  524. if (MachineFunction::iterator(MBB) == MBB->getParent()->begin()) {
  525. DEBUG(dbgs() << "Unable to find a MTCTR instruction for BB#" <<
  526. BI->getParent()->getNumber() << " (" <<
  527. BI->getParent()->getFullName() << ") instruction " <<
  528. *BI << "\n");
  529. return false;
  530. }
  531. for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(),
  532. PIE = MBB->pred_end(); PI != PIE; ++PI)
  533. Preds.push_back(*PI);
  534. }
  535. do {
  536. MBB = Preds.pop_back_val();
  537. if (!Visited.count(MBB)) {
  538. I = MBB->getLastNonDebugInstr();
  539. goto check_block;
  540. }
  541. } while (!Preds.empty());
  542. return true;
  543. }
  544. bool PPCCTRLoopsVerify::runOnMachineFunction(MachineFunction &MF) {
  545. MDT = &getAnalysis<MachineDominatorTree>();
  546. // Verify that all bdnz/bdz instructions are dominated by a loop mtctr before
  547. // any other instructions that might clobber the ctr register.
  548. for (MachineFunction::iterator I = MF.begin(), IE = MF.end();
  549. I != IE; ++I) {
  550. MachineBasicBlock *MBB = I;
  551. if (!MDT->isReachableFromEntry(MBB))
  552. continue;
  553. for (MachineBasicBlock::iterator MII = MBB->getFirstTerminator(),
  554. MIIE = MBB->end(); MII != MIIE; ++MII) {
  555. unsigned Opc = MII->getOpcode();
  556. if (Opc == PPC::BDNZ8 || Opc == PPC::BDNZ ||
  557. Opc == PPC::BDZ8 || Opc == PPC::BDZ)
  558. if (!verifyCTRBranch(MBB, MII))
  559. llvm_unreachable("Invalid PPC CTR loop!");
  560. }
  561. }
  562. return false;
  563. }
  564. #endif // NDEBUG