ScalarEvolutionExpander.cpp 73 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860
  1. //===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis --*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file contains the implementation of the scalar evolution expander,
  11. // which is used to generate the code corresponding to a given scalar evolution
  12. // expression.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "llvm/Analysis/ScalarEvolutionExpander.h"
  16. #include "llvm/ADT/STLExtras.h"
  17. #include "llvm/ADT/SmallSet.h"
  18. #include "llvm/Analysis/LoopInfo.h"
  19. #include "llvm/Analysis/TargetTransformInfo.h"
  20. #include "llvm/IR/DataLayout.h"
  21. #include "llvm/IR/Dominators.h"
  22. #include "llvm/IR/IntrinsicInst.h"
  23. #include "llvm/IR/LLVMContext.h"
  24. #include "llvm/Support/Debug.h"
  25. using namespace llvm;
  26. /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP,
  27. /// reusing an existing cast if a suitable one exists, moving an existing
  28. /// cast if a suitable one exists but isn't in the right place, or
  29. /// creating a new one.
  30. Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty,
  31. Instruction::CastOps Op,
  32. BasicBlock::iterator IP) {
  33. // This function must be called with the builder having a valid insertion
  34. // point. It doesn't need to be the actual IP where the uses of the returned
  35. // cast will be added, but it must dominate such IP.
  36. // We use this precondition to produce a cast that will dominate all its
  37. // uses. In particular, this is crucial for the case where the builder's
  38. // insertion point *is* the point where we were asked to put the cast.
  39. // Since we don't know the builder's insertion point is actually
  40. // where the uses will be added (only that it dominates it), we are
  41. // not allowed to move it.
  42. BasicBlock::iterator BIP = Builder.GetInsertPoint();
  43. Instruction *Ret = NULL;
  44. // Check to see if there is already a cast!
  45. for (Value::use_iterator UI = V->use_begin(), E = V->use_end();
  46. UI != E; ++UI) {
  47. User *U = *UI;
  48. if (U->getType() == Ty)
  49. if (CastInst *CI = dyn_cast<CastInst>(U))
  50. if (CI->getOpcode() == Op) {
  51. // If the cast isn't where we want it, create a new cast at IP.
  52. // Likewise, do not reuse a cast at BIP because it must dominate
  53. // instructions that might be inserted before BIP.
  54. if (BasicBlock::iterator(CI) != IP || BIP == IP) {
  55. // Create a new cast, and leave the old cast in place in case
  56. // it is being used as an insert point. Clear its operand
  57. // so that it doesn't hold anything live.
  58. Ret = CastInst::Create(Op, V, Ty, "", IP);
  59. Ret->takeName(CI);
  60. CI->replaceAllUsesWith(Ret);
  61. CI->setOperand(0, UndefValue::get(V->getType()));
  62. break;
  63. }
  64. Ret = CI;
  65. break;
  66. }
  67. }
  68. // Create a new cast.
  69. if (!Ret)
  70. Ret = CastInst::Create(Op, V, Ty, V->getName(), IP);
  71. // We assert at the end of the function since IP might point to an
  72. // instruction with different dominance properties than a cast
  73. // (an invoke for example) and not dominate BIP (but the cast does).
  74. assert(SE.DT->dominates(Ret, BIP));
  75. rememberInstruction(Ret);
  76. return Ret;
  77. }
  78. /// InsertNoopCastOfTo - Insert a cast of V to the specified type,
  79. /// which must be possible with a noop cast, doing what we can to share
  80. /// the casts.
  81. Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) {
  82. Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false);
  83. assert((Op == Instruction::BitCast ||
  84. Op == Instruction::PtrToInt ||
  85. Op == Instruction::IntToPtr) &&
  86. "InsertNoopCastOfTo cannot perform non-noop casts!");
  87. assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) &&
  88. "InsertNoopCastOfTo cannot change sizes!");
  89. // Short-circuit unnecessary bitcasts.
  90. if (Op == Instruction::BitCast) {
  91. if (V->getType() == Ty)
  92. return V;
  93. if (CastInst *CI = dyn_cast<CastInst>(V)) {
  94. if (CI->getOperand(0)->getType() == Ty)
  95. return CI->getOperand(0);
  96. }
  97. }
  98. // Short-circuit unnecessary inttoptr<->ptrtoint casts.
  99. if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) &&
  100. SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) {
  101. if (CastInst *CI = dyn_cast<CastInst>(V))
  102. if ((CI->getOpcode() == Instruction::PtrToInt ||
  103. CI->getOpcode() == Instruction::IntToPtr) &&
  104. SE.getTypeSizeInBits(CI->getType()) ==
  105. SE.getTypeSizeInBits(CI->getOperand(0)->getType()))
  106. return CI->getOperand(0);
  107. if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V))
  108. if ((CE->getOpcode() == Instruction::PtrToInt ||
  109. CE->getOpcode() == Instruction::IntToPtr) &&
  110. SE.getTypeSizeInBits(CE->getType()) ==
  111. SE.getTypeSizeInBits(CE->getOperand(0)->getType()))
  112. return CE->getOperand(0);
  113. }
  114. // Fold a cast of a constant.
  115. if (Constant *C = dyn_cast<Constant>(V))
  116. return ConstantExpr::getCast(Op, C, Ty);
  117. // Cast the argument at the beginning of the entry block, after
  118. // any bitcasts of other arguments.
  119. if (Argument *A = dyn_cast<Argument>(V)) {
  120. BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin();
  121. while ((isa<BitCastInst>(IP) &&
  122. isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) &&
  123. cast<BitCastInst>(IP)->getOperand(0) != A) ||
  124. isa<DbgInfoIntrinsic>(IP) ||
  125. isa<LandingPadInst>(IP))
  126. ++IP;
  127. return ReuseOrCreateCast(A, Ty, Op, IP);
  128. }
  129. // Cast the instruction immediately after the instruction.
  130. Instruction *I = cast<Instruction>(V);
  131. BasicBlock::iterator IP = I; ++IP;
  132. if (InvokeInst *II = dyn_cast<InvokeInst>(I))
  133. IP = II->getNormalDest()->begin();
  134. while (isa<PHINode>(IP) || isa<LandingPadInst>(IP))
  135. ++IP;
  136. return ReuseOrCreateCast(I, Ty, Op, IP);
  137. }
  138. /// InsertBinop - Insert the specified binary operator, doing a small amount
  139. /// of work to avoid inserting an obviously redundant operation.
  140. Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
  141. Value *LHS, Value *RHS) {
  142. // Fold a binop with constant operands.
  143. if (Constant *CLHS = dyn_cast<Constant>(LHS))
  144. if (Constant *CRHS = dyn_cast<Constant>(RHS))
  145. return ConstantExpr::get(Opcode, CLHS, CRHS);
  146. // Do a quick scan to see if we have this binop nearby. If so, reuse it.
  147. unsigned ScanLimit = 6;
  148. BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
  149. // Scanning starts from the last instruction before the insertion point.
  150. BasicBlock::iterator IP = Builder.GetInsertPoint();
  151. if (IP != BlockBegin) {
  152. --IP;
  153. for (; ScanLimit; --IP, --ScanLimit) {
  154. // Don't count dbg.value against the ScanLimit, to avoid perturbing the
  155. // generated code.
  156. if (isa<DbgInfoIntrinsic>(IP))
  157. ScanLimit++;
  158. if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS &&
  159. IP->getOperand(1) == RHS)
  160. return IP;
  161. if (IP == BlockBegin) break;
  162. }
  163. }
  164. // Save the original insertion point so we can restore it when we're done.
  165. DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc();
  166. BuilderType::InsertPointGuard Guard(Builder);
  167. // Move the insertion point out of as many loops as we can.
  168. while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
  169. if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
  170. BasicBlock *Preheader = L->getLoopPreheader();
  171. if (!Preheader) break;
  172. // Ok, move up a level.
  173. Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
  174. }
  175. // If we haven't found this binop, insert it.
  176. Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS));
  177. BO->setDebugLoc(Loc);
  178. rememberInstruction(BO);
  179. return BO;
  180. }
  181. /// FactorOutConstant - Test if S is divisible by Factor, using signed
  182. /// division. If so, update S with Factor divided out and return true.
  183. /// S need not be evenly divisible if a reasonable remainder can be
  184. /// computed.
  185. /// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
  186. /// unnecessary; in its place, just signed-divide Ops[i] by the scale and
  187. /// check to see if the divide was folded.
  188. static bool FactorOutConstant(const SCEV *&S,
  189. const SCEV *&Remainder,
  190. const SCEV *Factor,
  191. ScalarEvolution &SE,
  192. const DataLayout *DL) {
  193. // Everything is divisible by one.
  194. if (Factor->isOne())
  195. return true;
  196. // x/x == 1.
  197. if (S == Factor) {
  198. S = SE.getConstant(S->getType(), 1);
  199. return true;
  200. }
  201. // For a Constant, check for a multiple of the given factor.
  202. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
  203. // 0/x == 0.
  204. if (C->isZero())
  205. return true;
  206. // Check for divisibility.
  207. if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) {
  208. ConstantInt *CI =
  209. ConstantInt::get(SE.getContext(),
  210. C->getValue()->getValue().sdiv(
  211. FC->getValue()->getValue()));
  212. // If the quotient is zero and the remainder is non-zero, reject
  213. // the value at this scale. It will be considered for subsequent
  214. // smaller scales.
  215. if (!CI->isZero()) {
  216. const SCEV *Div = SE.getConstant(CI);
  217. S = Div;
  218. Remainder =
  219. SE.getAddExpr(Remainder,
  220. SE.getConstant(C->getValue()->getValue().srem(
  221. FC->getValue()->getValue())));
  222. return true;
  223. }
  224. }
  225. }
  226. // In a Mul, check if there is a constant operand which is a multiple
  227. // of the given factor.
  228. if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
  229. if (DL) {
  230. // With DataLayout, the size is known. Check if there is a constant
  231. // operand which is a multiple of the given factor. If so, we can
  232. // factor it.
  233. const SCEVConstant *FC = cast<SCEVConstant>(Factor);
  234. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
  235. if (!C->getValue()->getValue().srem(FC->getValue()->getValue())) {
  236. SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
  237. NewMulOps[0] =
  238. SE.getConstant(C->getValue()->getValue().sdiv(
  239. FC->getValue()->getValue()));
  240. S = SE.getMulExpr(NewMulOps);
  241. return true;
  242. }
  243. } else {
  244. // Without DataLayout, check if Factor can be factored out of any of the
  245. // Mul's operands. If so, we can just remove it.
  246. for (unsigned i = 0, e = M->getNumOperands(); i != e; ++i) {
  247. const SCEV *SOp = M->getOperand(i);
  248. const SCEV *Remainder = SE.getConstant(SOp->getType(), 0);
  249. if (FactorOutConstant(SOp, Remainder, Factor, SE, DL) &&
  250. Remainder->isZero()) {
  251. SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end());
  252. NewMulOps[i] = SOp;
  253. S = SE.getMulExpr(NewMulOps);
  254. return true;
  255. }
  256. }
  257. }
  258. }
  259. // In an AddRec, check if both start and step are divisible.
  260. if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) {
  261. const SCEV *Step = A->getStepRecurrence(SE);
  262. const SCEV *StepRem = SE.getConstant(Step->getType(), 0);
  263. if (!FactorOutConstant(Step, StepRem, Factor, SE, DL))
  264. return false;
  265. if (!StepRem->isZero())
  266. return false;
  267. const SCEV *Start = A->getStart();
  268. if (!FactorOutConstant(Start, Remainder, Factor, SE, DL))
  269. return false;
  270. S = SE.getAddRecExpr(Start, Step, A->getLoop(),
  271. A->getNoWrapFlags(SCEV::FlagNW));
  272. return true;
  273. }
  274. return false;
  275. }
  276. /// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs
  277. /// is the number of SCEVAddRecExprs present, which are kept at the end of
  278. /// the list.
  279. ///
  280. static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops,
  281. Type *Ty,
  282. ScalarEvolution &SE) {
  283. unsigned NumAddRecs = 0;
  284. for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i)
  285. ++NumAddRecs;
  286. // Group Ops into non-addrecs and addrecs.
  287. SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs);
  288. SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end());
  289. // Let ScalarEvolution sort and simplify the non-addrecs list.
  290. const SCEV *Sum = NoAddRecs.empty() ?
  291. SE.getConstant(Ty, 0) :
  292. SE.getAddExpr(NoAddRecs);
  293. // If it returned an add, use the operands. Otherwise it simplified
  294. // the sum into a single value, so just use that.
  295. Ops.clear();
  296. if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum))
  297. Ops.append(Add->op_begin(), Add->op_end());
  298. else if (!Sum->isZero())
  299. Ops.push_back(Sum);
  300. // Then append the addrecs.
  301. Ops.append(AddRecs.begin(), AddRecs.end());
  302. }
  303. /// SplitAddRecs - Flatten a list of add operands, moving addrec start values
  304. /// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}.
  305. /// This helps expose more opportunities for folding parts of the expressions
  306. /// into GEP indices.
  307. ///
  308. static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops,
  309. Type *Ty,
  310. ScalarEvolution &SE) {
  311. // Find the addrecs.
  312. SmallVector<const SCEV *, 8> AddRecs;
  313. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  314. while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) {
  315. const SCEV *Start = A->getStart();
  316. if (Start->isZero()) break;
  317. const SCEV *Zero = SE.getConstant(Ty, 0);
  318. AddRecs.push_back(SE.getAddRecExpr(Zero,
  319. A->getStepRecurrence(SE),
  320. A->getLoop(),
  321. A->getNoWrapFlags(SCEV::FlagNW)));
  322. if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) {
  323. Ops[i] = Zero;
  324. Ops.append(Add->op_begin(), Add->op_end());
  325. e += Add->getNumOperands();
  326. } else {
  327. Ops[i] = Start;
  328. }
  329. }
  330. if (!AddRecs.empty()) {
  331. // Add the addrecs onto the end of the list.
  332. Ops.append(AddRecs.begin(), AddRecs.end());
  333. // Resort the operand list, moving any constants to the front.
  334. SimplifyAddOperands(Ops, Ty, SE);
  335. }
  336. }
  337. /// expandAddToGEP - Expand an addition expression with a pointer type into
  338. /// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps
  339. /// BasicAliasAnalysis and other passes analyze the result. See the rules
  340. /// for getelementptr vs. inttoptr in
  341. /// http://llvm.org/docs/LangRef.html#pointeraliasing
  342. /// for details.
  343. ///
  344. /// Design note: The correctness of using getelementptr here depends on
  345. /// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as
  346. /// they may introduce pointer arithmetic which may not be safely converted
  347. /// into getelementptr.
  348. ///
  349. /// Design note: It might seem desirable for this function to be more
  350. /// loop-aware. If some of the indices are loop-invariant while others
  351. /// aren't, it might seem desirable to emit multiple GEPs, keeping the
  352. /// loop-invariant portions of the overall computation outside the loop.
  353. /// However, there are a few reasons this is not done here. Hoisting simple
  354. /// arithmetic is a low-level optimization that often isn't very
  355. /// important until late in the optimization process. In fact, passes
  356. /// like InstructionCombining will combine GEPs, even if it means
  357. /// pushing loop-invariant computation down into loops, so even if the
  358. /// GEPs were split here, the work would quickly be undone. The
  359. /// LoopStrengthReduction pass, which is usually run quite late (and
  360. /// after the last InstructionCombining pass), takes care of hoisting
  361. /// loop-invariant portions of expressions, after considering what
  362. /// can be folded using target addressing modes.
  363. ///
  364. Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
  365. const SCEV *const *op_end,
  366. PointerType *PTy,
  367. Type *Ty,
  368. Value *V) {
  369. Type *ElTy = PTy->getElementType();
  370. SmallVector<Value *, 4> GepIndices;
  371. SmallVector<const SCEV *, 8> Ops(op_begin, op_end);
  372. bool AnyNonZeroIndices = false;
  373. // Split AddRecs up into parts as either of the parts may be usable
  374. // without the other.
  375. SplitAddRecs(Ops, Ty, SE);
  376. Type *IntPtrTy = SE.DL
  377. ? SE.DL->getIntPtrType(PTy)
  378. : Type::getInt64Ty(PTy->getContext());
  379. // Descend down the pointer's type and attempt to convert the other
  380. // operands into GEP indices, at each level. The first index in a GEP
  381. // indexes into the array implied by the pointer operand; the rest of
  382. // the indices index into the element or field type selected by the
  383. // preceding index.
  384. for (;;) {
  385. // If the scale size is not 0, attempt to factor out a scale for
  386. // array indexing.
  387. SmallVector<const SCEV *, 8> ScaledOps;
  388. if (ElTy->isSized()) {
  389. const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy);
  390. if (!ElSize->isZero()) {
  391. SmallVector<const SCEV *, 8> NewOps;
  392. for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
  393. const SCEV *Op = Ops[i];
  394. const SCEV *Remainder = SE.getConstant(Ty, 0);
  395. if (FactorOutConstant(Op, Remainder, ElSize, SE, SE.DL)) {
  396. // Op now has ElSize factored out.
  397. ScaledOps.push_back(Op);
  398. if (!Remainder->isZero())
  399. NewOps.push_back(Remainder);
  400. AnyNonZeroIndices = true;
  401. } else {
  402. // The operand was not divisible, so add it to the list of operands
  403. // we'll scan next iteration.
  404. NewOps.push_back(Ops[i]);
  405. }
  406. }
  407. // If we made any changes, update Ops.
  408. if (!ScaledOps.empty()) {
  409. Ops = NewOps;
  410. SimplifyAddOperands(Ops, Ty, SE);
  411. }
  412. }
  413. }
  414. // Record the scaled array index for this level of the type. If
  415. // we didn't find any operands that could be factored, tentatively
  416. // assume that element zero was selected (since the zero offset
  417. // would obviously be folded away).
  418. Value *Scaled = ScaledOps.empty() ?
  419. Constant::getNullValue(Ty) :
  420. expandCodeFor(SE.getAddExpr(ScaledOps), Ty);
  421. GepIndices.push_back(Scaled);
  422. // Collect struct field index operands.
  423. while (StructType *STy = dyn_cast<StructType>(ElTy)) {
  424. bool FoundFieldNo = false;
  425. // An empty struct has no fields.
  426. if (STy->getNumElements() == 0) break;
  427. if (SE.DL) {
  428. // With DataLayout, field offsets are known. See if a constant offset
  429. // falls within any of the struct fields.
  430. if (Ops.empty()) break;
  431. if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0]))
  432. if (SE.getTypeSizeInBits(C->getType()) <= 64) {
  433. const StructLayout &SL = *SE.DL->getStructLayout(STy);
  434. uint64_t FullOffset = C->getValue()->getZExtValue();
  435. if (FullOffset < SL.getSizeInBytes()) {
  436. unsigned ElIdx = SL.getElementContainingOffset(FullOffset);
  437. GepIndices.push_back(
  438. ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx));
  439. ElTy = STy->getTypeAtIndex(ElIdx);
  440. Ops[0] =
  441. SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx));
  442. AnyNonZeroIndices = true;
  443. FoundFieldNo = true;
  444. }
  445. }
  446. } else {
  447. // Without DataLayout, just check for an offsetof expression of the
  448. // appropriate struct type.
  449. for (unsigned i = 0, e = Ops.size(); i != e; ++i)
  450. if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Ops[i])) {
  451. Type *CTy;
  452. Constant *FieldNo;
  453. if (U->isOffsetOf(CTy, FieldNo) && CTy == STy) {
  454. GepIndices.push_back(FieldNo);
  455. ElTy =
  456. STy->getTypeAtIndex(cast<ConstantInt>(FieldNo)->getZExtValue());
  457. Ops[i] = SE.getConstant(Ty, 0);
  458. AnyNonZeroIndices = true;
  459. FoundFieldNo = true;
  460. break;
  461. }
  462. }
  463. }
  464. // If no struct field offsets were found, tentatively assume that
  465. // field zero was selected (since the zero offset would obviously
  466. // be folded away).
  467. if (!FoundFieldNo) {
  468. ElTy = STy->getTypeAtIndex(0u);
  469. GepIndices.push_back(
  470. Constant::getNullValue(Type::getInt32Ty(Ty->getContext())));
  471. }
  472. }
  473. if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy))
  474. ElTy = ATy->getElementType();
  475. else
  476. break;
  477. }
  478. // If none of the operands were convertible to proper GEP indices, cast
  479. // the base to i8* and do an ugly getelementptr with that. It's still
  480. // better than ptrtoint+arithmetic+inttoptr at least.
  481. if (!AnyNonZeroIndices) {
  482. // Cast the base to i8*.
  483. V = InsertNoopCastOfTo(V,
  484. Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace()));
  485. assert(!isa<Instruction>(V) ||
  486. SE.DT->dominates(cast<Instruction>(V), Builder.GetInsertPoint()));
  487. // Expand the operands for a plain byte offset.
  488. Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty);
  489. // Fold a GEP with constant operands.
  490. if (Constant *CLHS = dyn_cast<Constant>(V))
  491. if (Constant *CRHS = dyn_cast<Constant>(Idx))
  492. return ConstantExpr::getGetElementPtr(CLHS, CRHS);
  493. // Do a quick scan to see if we have this GEP nearby. If so, reuse it.
  494. unsigned ScanLimit = 6;
  495. BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin();
  496. // Scanning starts from the last instruction before the insertion point.
  497. BasicBlock::iterator IP = Builder.GetInsertPoint();
  498. if (IP != BlockBegin) {
  499. --IP;
  500. for (; ScanLimit; --IP, --ScanLimit) {
  501. // Don't count dbg.value against the ScanLimit, to avoid perturbing the
  502. // generated code.
  503. if (isa<DbgInfoIntrinsic>(IP))
  504. ScanLimit++;
  505. if (IP->getOpcode() == Instruction::GetElementPtr &&
  506. IP->getOperand(0) == V && IP->getOperand(1) == Idx)
  507. return IP;
  508. if (IP == BlockBegin) break;
  509. }
  510. }
  511. // Save the original insertion point so we can restore it when we're done.
  512. BuilderType::InsertPointGuard Guard(Builder);
  513. // Move the insertion point out of as many loops as we can.
  514. while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
  515. if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
  516. BasicBlock *Preheader = L->getLoopPreheader();
  517. if (!Preheader) break;
  518. // Ok, move up a level.
  519. Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
  520. }
  521. // Emit a GEP.
  522. Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
  523. rememberInstruction(GEP);
  524. return GEP;
  525. }
  526. // Save the original insertion point so we can restore it when we're done.
  527. BuilderType::InsertPoint SaveInsertPt = Builder.saveIP();
  528. // Move the insertion point out of as many loops as we can.
  529. while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
  530. if (!L->isLoopInvariant(V)) break;
  531. bool AnyIndexNotLoopInvariant = false;
  532. for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(),
  533. E = GepIndices.end(); I != E; ++I)
  534. if (!L->isLoopInvariant(*I)) {
  535. AnyIndexNotLoopInvariant = true;
  536. break;
  537. }
  538. if (AnyIndexNotLoopInvariant)
  539. break;
  540. BasicBlock *Preheader = L->getLoopPreheader();
  541. if (!Preheader) break;
  542. // Ok, move up a level.
  543. Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
  544. }
  545. // Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
  546. // because ScalarEvolution may have changed the address arithmetic to
  547. // compute a value which is beyond the end of the allocated object.
  548. Value *Casted = V;
  549. if (V->getType() != PTy)
  550. Casted = InsertNoopCastOfTo(Casted, PTy);
  551. Value *GEP = Builder.CreateGEP(Casted,
  552. GepIndices,
  553. "scevgep");
  554. Ops.push_back(SE.getUnknown(GEP));
  555. rememberInstruction(GEP);
  556. // Restore the original insert point.
  557. Builder.restoreIP(SaveInsertPt);
  558. return expand(SE.getAddExpr(Ops));
  559. }
  560. /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
  561. /// SCEV expansion. If they are nested, this is the most nested. If they are
  562. /// neighboring, pick the later.
  563. static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
  564. DominatorTree &DT) {
  565. if (!A) return B;
  566. if (!B) return A;
  567. if (A->contains(B)) return B;
  568. if (B->contains(A)) return A;
  569. if (DT.dominates(A->getHeader(), B->getHeader())) return B;
  570. if (DT.dominates(B->getHeader(), A->getHeader())) return A;
  571. return A; // Arbitrarily break the tie.
  572. }
  573. /// getRelevantLoop - Get the most relevant loop associated with the given
  574. /// expression, according to PickMostRelevantLoop.
  575. const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) {
  576. // Test whether we've already computed the most relevant loop for this SCEV.
  577. std::pair<DenseMap<const SCEV *, const Loop *>::iterator, bool> Pair =
  578. RelevantLoops.insert(std::make_pair(S, static_cast<const Loop *>(0)));
  579. if (!Pair.second)
  580. return Pair.first->second;
  581. if (isa<SCEVConstant>(S))
  582. // A constant has no relevant loops.
  583. return 0;
  584. if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
  585. if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
  586. return Pair.first->second = SE.LI->getLoopFor(I->getParent());
  587. // A non-instruction has no relevant loops.
  588. return 0;
  589. }
  590. if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
  591. const Loop *L = 0;
  592. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
  593. L = AR->getLoop();
  594. for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
  595. I != E; ++I)
  596. L = PickMostRelevantLoop(L, getRelevantLoop(*I), *SE.DT);
  597. return RelevantLoops[N] = L;
  598. }
  599. if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) {
  600. const Loop *Result = getRelevantLoop(C->getOperand());
  601. return RelevantLoops[C] = Result;
  602. }
  603. if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
  604. const Loop *Result =
  605. PickMostRelevantLoop(getRelevantLoop(D->getLHS()),
  606. getRelevantLoop(D->getRHS()),
  607. *SE.DT);
  608. return RelevantLoops[D] = Result;
  609. }
  610. llvm_unreachable("Unexpected SCEV type!");
  611. }
  612. namespace {
  613. /// LoopCompare - Compare loops by PickMostRelevantLoop.
  614. class LoopCompare {
  615. DominatorTree &DT;
  616. public:
  617. explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
  618. bool operator()(std::pair<const Loop *, const SCEV *> LHS,
  619. std::pair<const Loop *, const SCEV *> RHS) const {
  620. // Keep pointer operands sorted at the end.
  621. if (LHS.second->getType()->isPointerTy() !=
  622. RHS.second->getType()->isPointerTy())
  623. return LHS.second->getType()->isPointerTy();
  624. // Compare loops with PickMostRelevantLoop.
  625. if (LHS.first != RHS.first)
  626. return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
  627. // If one operand is a non-constant negative and the other is not,
  628. // put the non-constant negative on the right so that a sub can
  629. // be used instead of a negate and add.
  630. if (LHS.second->isNonConstantNegative()) {
  631. if (!RHS.second->isNonConstantNegative())
  632. return false;
  633. } else if (RHS.second->isNonConstantNegative())
  634. return true;
  635. // Otherwise they are equivalent according to this comparison.
  636. return false;
  637. }
  638. };
  639. }
  640. Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
  641. Type *Ty = SE.getEffectiveSCEVType(S->getType());
  642. // Collect all the add operands in a loop, along with their associated loops.
  643. // Iterate in reverse so that constants are emitted last, all else equal, and
  644. // so that pointer operands are inserted first, which the code below relies on
  645. // to form more involved GEPs.
  646. SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
  647. for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
  648. E(S->op_begin()); I != E; ++I)
  649. OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
  650. // Sort by loop. Use a stable sort so that constants follow non-constants and
  651. // pointer operands precede non-pointer operands.
  652. std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
  653. // Emit instructions to add all the operands. Hoist as much as possible
  654. // out of loops, and form meaningful getelementptrs where possible.
  655. Value *Sum = 0;
  656. for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
  657. I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
  658. const Loop *CurLoop = I->first;
  659. const SCEV *Op = I->second;
  660. if (!Sum) {
  661. // This is the first operand. Just expand it.
  662. Sum = expand(Op);
  663. ++I;
  664. } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
  665. // The running sum expression is a pointer. Try to form a getelementptr
  666. // at this level with that as the base.
  667. SmallVector<const SCEV *, 4> NewOps;
  668. for (; I != E && I->first == CurLoop; ++I) {
  669. // If the operand is SCEVUnknown and not instructions, peek through
  670. // it, to enable more of it to be folded into the GEP.
  671. const SCEV *X = I->second;
  672. if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X))
  673. if (!isa<Instruction>(U->getValue()))
  674. X = SE.getSCEV(U->getValue());
  675. NewOps.push_back(X);
  676. }
  677. Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
  678. } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
  679. // The running sum is an integer, and there's a pointer at this level.
  680. // Try to form a getelementptr. If the running sum is instructions,
  681. // use a SCEVUnknown to avoid re-analyzing them.
  682. SmallVector<const SCEV *, 4> NewOps;
  683. NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) :
  684. SE.getSCEV(Sum));
  685. for (++I; I != E && I->first == CurLoop; ++I)
  686. NewOps.push_back(I->second);
  687. Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
  688. } else if (Op->isNonConstantNegative()) {
  689. // Instead of doing a negate and add, just do a subtract.
  690. Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
  691. Sum = InsertNoopCastOfTo(Sum, Ty);
  692. Sum = InsertBinop(Instruction::Sub, Sum, W);
  693. ++I;
  694. } else {
  695. // A simple add.
  696. Value *W = expandCodeFor(Op, Ty);
  697. Sum = InsertNoopCastOfTo(Sum, Ty);
  698. // Canonicalize a constant to the RHS.
  699. if (isa<Constant>(Sum)) std::swap(Sum, W);
  700. Sum = InsertBinop(Instruction::Add, Sum, W);
  701. ++I;
  702. }
  703. }
  704. return Sum;
  705. }
  706. Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
  707. Type *Ty = SE.getEffectiveSCEVType(S->getType());
  708. // Collect all the mul operands in a loop, along with their associated loops.
  709. // Iterate in reverse so that constants are emitted last, all else equal.
  710. SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
  711. for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
  712. E(S->op_begin()); I != E; ++I)
  713. OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I));
  714. // Sort by loop. Use a stable sort so that constants follow non-constants.
  715. std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
  716. // Emit instructions to mul all the operands. Hoist as much as possible
  717. // out of loops.
  718. Value *Prod = 0;
  719. for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
  720. I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
  721. const SCEV *Op = I->second;
  722. if (!Prod) {
  723. // This is the first operand. Just expand it.
  724. Prod = expand(Op);
  725. ++I;
  726. } else if (Op->isAllOnesValue()) {
  727. // Instead of doing a multiply by negative one, just do a negate.
  728. Prod = InsertNoopCastOfTo(Prod, Ty);
  729. Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
  730. ++I;
  731. } else {
  732. // A simple mul.
  733. Value *W = expandCodeFor(Op, Ty);
  734. Prod = InsertNoopCastOfTo(Prod, Ty);
  735. // Canonicalize a constant to the RHS.
  736. if (isa<Constant>(Prod)) std::swap(Prod, W);
  737. Prod = InsertBinop(Instruction::Mul, Prod, W);
  738. ++I;
  739. }
  740. }
  741. return Prod;
  742. }
  743. Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
  744. Type *Ty = SE.getEffectiveSCEVType(S->getType());
  745. Value *LHS = expandCodeFor(S->getLHS(), Ty);
  746. if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) {
  747. const APInt &RHS = SC->getValue()->getValue();
  748. if (RHS.isPowerOf2())
  749. return InsertBinop(Instruction::LShr, LHS,
  750. ConstantInt::get(Ty, RHS.logBase2()));
  751. }
  752. Value *RHS = expandCodeFor(S->getRHS(), Ty);
  753. return InsertBinop(Instruction::UDiv, LHS, RHS);
  754. }
  755. /// Move parts of Base into Rest to leave Base with the minimal
  756. /// expression that provides a pointer operand suitable for a
  757. /// GEP expansion.
  758. static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest,
  759. ScalarEvolution &SE) {
  760. while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) {
  761. Base = A->getStart();
  762. Rest = SE.getAddExpr(Rest,
  763. SE.getAddRecExpr(SE.getConstant(A->getType(), 0),
  764. A->getStepRecurrence(SE),
  765. A->getLoop(),
  766. A->getNoWrapFlags(SCEV::FlagNW)));
  767. }
  768. if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) {
  769. Base = A->getOperand(A->getNumOperands()-1);
  770. SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end());
  771. NewAddOps.back() = Rest;
  772. Rest = SE.getAddExpr(NewAddOps);
  773. ExposePointerBase(Base, Rest, SE);
  774. }
  775. }
  776. /// Determine if this is a well-behaved chain of instructions leading back to
  777. /// the PHI. If so, it may be reused by expanded expressions.
  778. bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV,
  779. const Loop *L) {
  780. if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) ||
  781. (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV)))
  782. return false;
  783. // If any of the operands don't dominate the insert position, bail.
  784. // Addrec operands are always loop-invariant, so this can only happen
  785. // if there are instructions which haven't been hoisted.
  786. if (L == IVIncInsertLoop) {
  787. for (User::op_iterator OI = IncV->op_begin()+1,
  788. OE = IncV->op_end(); OI != OE; ++OI)
  789. if (Instruction *OInst = dyn_cast<Instruction>(OI))
  790. if (!SE.DT->dominates(OInst, IVIncInsertPos))
  791. return false;
  792. }
  793. // Advance to the next instruction.
  794. IncV = dyn_cast<Instruction>(IncV->getOperand(0));
  795. if (!IncV)
  796. return false;
  797. if (IncV->mayHaveSideEffects())
  798. return false;
  799. if (IncV != PN)
  800. return true;
  801. return isNormalAddRecExprPHI(PN, IncV, L);
  802. }
  803. /// getIVIncOperand returns an induction variable increment's induction
  804. /// variable operand.
  805. ///
  806. /// If allowScale is set, any type of GEP is allowed as long as the nonIV
  807. /// operands dominate InsertPos.
  808. ///
  809. /// If allowScale is not set, ensure that a GEP increment conforms to one of the
  810. /// simple patterns generated by getAddRecExprPHILiterally and
  811. /// expandAddtoGEP. If the pattern isn't recognized, return NULL.
  812. Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV,
  813. Instruction *InsertPos,
  814. bool allowScale) {
  815. if (IncV == InsertPos)
  816. return NULL;
  817. switch (IncV->getOpcode()) {
  818. default:
  819. return NULL;
  820. // Check for a simple Add/Sub or GEP of a loop invariant step.
  821. case Instruction::Add:
  822. case Instruction::Sub: {
  823. Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1));
  824. if (!OInst || SE.DT->dominates(OInst, InsertPos))
  825. return dyn_cast<Instruction>(IncV->getOperand(0));
  826. return NULL;
  827. }
  828. case Instruction::BitCast:
  829. return dyn_cast<Instruction>(IncV->getOperand(0));
  830. case Instruction::GetElementPtr:
  831. for (Instruction::op_iterator I = IncV->op_begin()+1, E = IncV->op_end();
  832. I != E; ++I) {
  833. if (isa<Constant>(*I))
  834. continue;
  835. if (Instruction *OInst = dyn_cast<Instruction>(*I)) {
  836. if (!SE.DT->dominates(OInst, InsertPos))
  837. return NULL;
  838. }
  839. if (allowScale) {
  840. // allow any kind of GEP as long as it can be hoisted.
  841. continue;
  842. }
  843. // This must be a pointer addition of constants (pretty), which is already
  844. // handled, or some number of address-size elements (ugly). Ugly geps
  845. // have 2 operands. i1* is used by the expander to represent an
  846. // address-size element.
  847. if (IncV->getNumOperands() != 2)
  848. return NULL;
  849. unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace();
  850. if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS)
  851. && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS))
  852. return NULL;
  853. break;
  854. }
  855. return dyn_cast<Instruction>(IncV->getOperand(0));
  856. }
  857. }
  858. /// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make
  859. /// it available to other uses in this loop. Recursively hoist any operands,
  860. /// until we reach a value that dominates InsertPos.
  861. bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) {
  862. if (SE.DT->dominates(IncV, InsertPos))
  863. return true;
  864. // InsertPos must itself dominate IncV so that IncV's new position satisfies
  865. // its existing users.
  866. if (isa<PHINode>(InsertPos)
  867. || !SE.DT->dominates(InsertPos->getParent(), IncV->getParent()))
  868. return false;
  869. // Check that the chain of IV operands leading back to Phi can be hoisted.
  870. SmallVector<Instruction*, 4> IVIncs;
  871. for(;;) {
  872. Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true);
  873. if (!Oper)
  874. return false;
  875. // IncV is safe to hoist.
  876. IVIncs.push_back(IncV);
  877. IncV = Oper;
  878. if (SE.DT->dominates(IncV, InsertPos))
  879. break;
  880. }
  881. for (SmallVectorImpl<Instruction*>::reverse_iterator I = IVIncs.rbegin(),
  882. E = IVIncs.rend(); I != E; ++I) {
  883. (*I)->moveBefore(InsertPos);
  884. }
  885. return true;
  886. }
  887. /// Determine if this cyclic phi is in a form that would have been generated by
  888. /// LSR. We don't care if the phi was actually expanded in this pass, as long
  889. /// as it is in a low-cost form, for example, no implied multiplication. This
  890. /// should match any patterns generated by getAddRecExprPHILiterally and
  891. /// expandAddtoGEP.
  892. bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV,
  893. const Loop *L) {
  894. for(Instruction *IVOper = IncV;
  895. (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(),
  896. /*allowScale=*/false));) {
  897. if (IVOper == PN)
  898. return true;
  899. }
  900. return false;
  901. }
  902. /// expandIVInc - Expand an IV increment at Builder's current InsertPos.
  903. /// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may
  904. /// need to materialize IV increments elsewhere to handle difficult situations.
  905. Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L,
  906. Type *ExpandTy, Type *IntTy,
  907. bool useSubtract) {
  908. Value *IncV;
  909. // If the PHI is a pointer, use a GEP, otherwise use an add or sub.
  910. if (ExpandTy->isPointerTy()) {
  911. PointerType *GEPPtrTy = cast<PointerType>(ExpandTy);
  912. // If the step isn't constant, don't use an implicitly scaled GEP, because
  913. // that would require a multiply inside the loop.
  914. if (!isa<ConstantInt>(StepV))
  915. GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()),
  916. GEPPtrTy->getAddressSpace());
  917. const SCEV *const StepArray[1] = { SE.getSCEV(StepV) };
  918. IncV = expandAddToGEP(StepArray, StepArray+1, GEPPtrTy, IntTy, PN);
  919. if (IncV->getType() != PN->getType()) {
  920. IncV = Builder.CreateBitCast(IncV, PN->getType());
  921. rememberInstruction(IncV);
  922. }
  923. } else {
  924. IncV = useSubtract ?
  925. Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") :
  926. Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next");
  927. rememberInstruction(IncV);
  928. }
  929. return IncV;
  930. }
  931. /// \brief Hoist the addrec instruction chain rooted in the loop phi above the
  932. /// position. This routine assumes that this is possible (has been checked).
  933. static void hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist,
  934. Instruction *Pos, PHINode *LoopPhi) {
  935. do {
  936. if (DT->dominates(InstToHoist, Pos))
  937. break;
  938. // Make sure the increment is where we want it. But don't move it
  939. // down past a potential existing post-inc user.
  940. InstToHoist->moveBefore(Pos);
  941. Pos = InstToHoist;
  942. InstToHoist = cast<Instruction>(InstToHoist->getOperand(0));
  943. } while (InstToHoist != LoopPhi);
  944. }
  945. /// \brief Check whether we can cheaply express the requested SCEV in terms of
  946. /// the available PHI SCEV by truncation and/or invertion of the step.
  947. static bool canBeCheaplyTransformed(ScalarEvolution &SE,
  948. const SCEVAddRecExpr *Phi,
  949. const SCEVAddRecExpr *Requested,
  950. bool &InvertStep) {
  951. Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType());
  952. Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType());
  953. if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth())
  954. return false;
  955. // Try truncate it if necessary.
  956. Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy));
  957. if (!Phi)
  958. return false;
  959. // Check whether truncation will help.
  960. if (Phi == Requested) {
  961. InvertStep = false;
  962. return true;
  963. }
  964. // Check whether inverting will help: {R,+,-1} == R - {0,+,1}.
  965. if (SE.getAddExpr(Requested->getStart(),
  966. SE.getNegativeSCEV(Requested)) == Phi) {
  967. InvertStep = true;
  968. return true;
  969. }
  970. return false;
  971. }
  972. /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand
  973. /// the base addrec, which is the addrec without any non-loop-dominating
  974. /// values, and return the PHI.
  975. PHINode *
  976. SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
  977. const Loop *L,
  978. Type *ExpandTy,
  979. Type *IntTy,
  980. Type *&TruncTy,
  981. bool &InvertStep) {
  982. assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position");
  983. // Reuse a previously-inserted PHI, if present.
  984. BasicBlock *LatchBlock = L->getLoopLatch();
  985. if (LatchBlock) {
  986. PHINode *AddRecPhiMatch = 0;
  987. Instruction *IncV = 0;
  988. TruncTy = 0;
  989. InvertStep = false;
  990. // Only try partially matching scevs that need truncation and/or
  991. // step-inversion if we know this loop is outside the current loop.
  992. bool TryNonMatchingSCEV = IVIncInsertLoop &&
  993. SE.DT->properlyDominates(LatchBlock, IVIncInsertLoop->getHeader());
  994. for (BasicBlock::iterator I = L->getHeader()->begin();
  995. PHINode *PN = dyn_cast<PHINode>(I); ++I) {
  996. if (!SE.isSCEVable(PN->getType()))
  997. continue;
  998. const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PN));
  999. if (!PhiSCEV)
  1000. continue;
  1001. bool IsMatchingSCEV = PhiSCEV == Normalized;
  1002. // We only handle truncation and inversion of phi recurrences for the
  1003. // expanded expression if the expanded expression's loop dominates the
  1004. // loop we insert to. Check now, so we can bail out early.
  1005. if (!IsMatchingSCEV && !TryNonMatchingSCEV)
  1006. continue;
  1007. Instruction *TempIncV =
  1008. cast<Instruction>(PN->getIncomingValueForBlock(LatchBlock));
  1009. // Check whether we can reuse this PHI node.
  1010. if (LSRMode) {
  1011. if (!isExpandedAddRecExprPHI(PN, TempIncV, L))
  1012. continue;
  1013. if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos))
  1014. continue;
  1015. } else {
  1016. if (!isNormalAddRecExprPHI(PN, TempIncV, L))
  1017. continue;
  1018. }
  1019. // Stop if we have found an exact match SCEV.
  1020. if (IsMatchingSCEV) {
  1021. IncV = TempIncV;
  1022. TruncTy = 0;
  1023. InvertStep = false;
  1024. AddRecPhiMatch = PN;
  1025. break;
  1026. }
  1027. // Try whether the phi can be translated into the requested form
  1028. // (truncated and/or offset by a constant).
  1029. if ((!TruncTy || InvertStep) &&
  1030. canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) {
  1031. // Record the phi node. But don't stop we might find an exact match
  1032. // later.
  1033. AddRecPhiMatch = PN;
  1034. IncV = TempIncV;
  1035. TruncTy = SE.getEffectiveSCEVType(Normalized->getType());
  1036. }
  1037. }
  1038. if (AddRecPhiMatch) {
  1039. // Potentially, move the increment. We have made sure in
  1040. // isExpandedAddRecExprPHI or hoistIVInc that this is possible.
  1041. if (L == IVIncInsertLoop)
  1042. hoistBeforePos(SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch);
  1043. // Ok, the add recurrence looks usable.
  1044. // Remember this PHI, even in post-inc mode.
  1045. InsertedValues.insert(AddRecPhiMatch);
  1046. // Remember the increment.
  1047. rememberInstruction(IncV);
  1048. return AddRecPhiMatch;
  1049. }
  1050. }
  1051. // Save the original insertion point so we can restore it when we're done.
  1052. BuilderType::InsertPointGuard Guard(Builder);
  1053. // Another AddRec may need to be recursively expanded below. For example, if
  1054. // this AddRec is quadratic, the StepV may itself be an AddRec in this
  1055. // loop. Remove this loop from the PostIncLoops set before expanding such
  1056. // AddRecs. Otherwise, we cannot find a valid position for the step
  1057. // (i.e. StepV can never dominate its loop header). Ideally, we could do
  1058. // SavedIncLoops.swap(PostIncLoops), but we generally have a single element,
  1059. // so it's not worth implementing SmallPtrSet::swap.
  1060. PostIncLoopSet SavedPostIncLoops = PostIncLoops;
  1061. PostIncLoops.clear();
  1062. // Expand code for the start value.
  1063. Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy,
  1064. L->getHeader()->begin());
  1065. // StartV must be hoisted into L's preheader to dominate the new phi.
  1066. assert(!isa<Instruction>(StartV) ||
  1067. SE.DT->properlyDominates(cast<Instruction>(StartV)->getParent(),
  1068. L->getHeader()));
  1069. // Expand code for the step value. Do this before creating the PHI so that PHI
  1070. // reuse code doesn't see an incomplete PHI.
  1071. const SCEV *Step = Normalized->getStepRecurrence(SE);
  1072. // If the stride is negative, insert a sub instead of an add for the increment
  1073. // (unless it's a constant, because subtracts of constants are canonicalized
  1074. // to adds).
  1075. bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
  1076. if (useSubtract)
  1077. Step = SE.getNegativeSCEV(Step);
  1078. // Expand the step somewhere that dominates the loop header.
  1079. Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
  1080. // Create the PHI.
  1081. BasicBlock *Header = L->getHeader();
  1082. Builder.SetInsertPoint(Header, Header->begin());
  1083. pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
  1084. PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE),
  1085. Twine(IVName) + ".iv");
  1086. rememberInstruction(PN);
  1087. // Create the step instructions and populate the PHI.
  1088. for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
  1089. BasicBlock *Pred = *HPI;
  1090. // Add a start value.
  1091. if (!L->contains(Pred)) {
  1092. PN->addIncoming(StartV, Pred);
  1093. continue;
  1094. }
  1095. // Create a step value and add it to the PHI.
  1096. // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the
  1097. // instructions at IVIncInsertPos.
  1098. Instruction *InsertPos = L == IVIncInsertLoop ?
  1099. IVIncInsertPos : Pred->getTerminator();
  1100. Builder.SetInsertPoint(InsertPos);
  1101. Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
  1102. if (isa<OverflowingBinaryOperator>(IncV)) {
  1103. if (Normalized->getNoWrapFlags(SCEV::FlagNUW))
  1104. cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap();
  1105. if (Normalized->getNoWrapFlags(SCEV::FlagNSW))
  1106. cast<BinaryOperator>(IncV)->setHasNoSignedWrap();
  1107. }
  1108. PN->addIncoming(IncV, Pred);
  1109. }
  1110. // After expanding subexpressions, restore the PostIncLoops set so the caller
  1111. // can ensure that IVIncrement dominates the current uses.
  1112. PostIncLoops = SavedPostIncLoops;
  1113. // Remember this PHI, even in post-inc mode.
  1114. InsertedValues.insert(PN);
  1115. return PN;
  1116. }
  1117. Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
  1118. Type *STy = S->getType();
  1119. Type *IntTy = SE.getEffectiveSCEVType(STy);
  1120. const Loop *L = S->getLoop();
  1121. // Determine a normalized form of this expression, which is the expression
  1122. // before any post-inc adjustment is made.
  1123. const SCEVAddRecExpr *Normalized = S;
  1124. if (PostIncLoops.count(L)) {
  1125. PostIncLoopSet Loops;
  1126. Loops.insert(L);
  1127. Normalized =
  1128. cast<SCEVAddRecExpr>(TransformForPostIncUse(Normalize, S, 0, 0,
  1129. Loops, SE, *SE.DT));
  1130. }
  1131. // Strip off any non-loop-dominating component from the addrec start.
  1132. const SCEV *Start = Normalized->getStart();
  1133. const SCEV *PostLoopOffset = 0;
  1134. if (!SE.properlyDominates(Start, L->getHeader())) {
  1135. PostLoopOffset = Start;
  1136. Start = SE.getConstant(Normalized->getType(), 0);
  1137. Normalized = cast<SCEVAddRecExpr>(
  1138. SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE),
  1139. Normalized->getLoop(),
  1140. Normalized->getNoWrapFlags(SCEV::FlagNW)));
  1141. }
  1142. // Strip off any non-loop-dominating component from the addrec step.
  1143. const SCEV *Step = Normalized->getStepRecurrence(SE);
  1144. const SCEV *PostLoopScale = 0;
  1145. if (!SE.dominates(Step, L->getHeader())) {
  1146. PostLoopScale = Step;
  1147. Step = SE.getConstant(Normalized->getType(), 1);
  1148. Normalized =
  1149. cast<SCEVAddRecExpr>(SE.getAddRecExpr(
  1150. Start, Step, Normalized->getLoop(),
  1151. Normalized->getNoWrapFlags(SCEV::FlagNW)));
  1152. }
  1153. // Expand the core addrec. If we need post-loop scaling, force it to
  1154. // expand to an integer type to avoid the need for additional casting.
  1155. Type *ExpandTy = PostLoopScale ? IntTy : STy;
  1156. // In some cases, we decide to reuse an existing phi node but need to truncate
  1157. // it and/or invert the step.
  1158. Type *TruncTy = 0;
  1159. bool InvertStep = false;
  1160. PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy,
  1161. TruncTy, InvertStep);
  1162. // Accommodate post-inc mode, if necessary.
  1163. Value *Result;
  1164. if (!PostIncLoops.count(L))
  1165. Result = PN;
  1166. else {
  1167. // In PostInc mode, use the post-incremented value.
  1168. BasicBlock *LatchBlock = L->getLoopLatch();
  1169. assert(LatchBlock && "PostInc mode requires a unique loop latch!");
  1170. Result = PN->getIncomingValueForBlock(LatchBlock);
  1171. // For an expansion to use the postinc form, the client must call
  1172. // expandCodeFor with an InsertPoint that is either outside the PostIncLoop
  1173. // or dominated by IVIncInsertPos.
  1174. if (isa<Instruction>(Result)
  1175. && !SE.DT->dominates(cast<Instruction>(Result),
  1176. Builder.GetInsertPoint())) {
  1177. // The induction variable's postinc expansion does not dominate this use.
  1178. // IVUsers tries to prevent this case, so it is rare. However, it can
  1179. // happen when an IVUser outside the loop is not dominated by the latch
  1180. // block. Adjusting IVIncInsertPos before expansion begins cannot handle
  1181. // all cases. Consider a phi outide whose operand is replaced during
  1182. // expansion with the value of the postinc user. Without fundamentally
  1183. // changing the way postinc users are tracked, the only remedy is
  1184. // inserting an extra IV increment. StepV might fold into PostLoopOffset,
  1185. // but hopefully expandCodeFor handles that.
  1186. bool useSubtract =
  1187. !ExpandTy->isPointerTy() && Step->isNonConstantNegative();
  1188. if (useSubtract)
  1189. Step = SE.getNegativeSCEV(Step);
  1190. Value *StepV;
  1191. {
  1192. // Expand the step somewhere that dominates the loop header.
  1193. BuilderType::InsertPointGuard Guard(Builder);
  1194. StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin());
  1195. }
  1196. Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
  1197. }
  1198. }
  1199. // We have decided to reuse an induction variable of a dominating loop. Apply
  1200. // truncation and/or invertion of the step.
  1201. if (TruncTy) {
  1202. Type *ResTy = Result->getType();
  1203. // Normalize the result type.
  1204. if (ResTy != SE.getEffectiveSCEVType(ResTy))
  1205. Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy));
  1206. // Truncate the result.
  1207. if (TruncTy != Result->getType()) {
  1208. Result = Builder.CreateTrunc(Result, TruncTy);
  1209. rememberInstruction(Result);
  1210. }
  1211. // Invert the result.
  1212. if (InvertStep) {
  1213. Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy),
  1214. Result);
  1215. rememberInstruction(Result);
  1216. }
  1217. }
  1218. // Re-apply any non-loop-dominating scale.
  1219. if (PostLoopScale) {
  1220. assert(S->isAffine() && "Can't linearly scale non-affine recurrences.");
  1221. Result = InsertNoopCastOfTo(Result, IntTy);
  1222. Result = Builder.CreateMul(Result,
  1223. expandCodeFor(PostLoopScale, IntTy));
  1224. rememberInstruction(Result);
  1225. }
  1226. // Re-apply any non-loop-dominating offset.
  1227. if (PostLoopOffset) {
  1228. if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) {
  1229. const SCEV *const OffsetArray[1] = { PostLoopOffset };
  1230. Result = expandAddToGEP(OffsetArray, OffsetArray+1, PTy, IntTy, Result);
  1231. } else {
  1232. Result = InsertNoopCastOfTo(Result, IntTy);
  1233. Result = Builder.CreateAdd(Result,
  1234. expandCodeFor(PostLoopOffset, IntTy));
  1235. rememberInstruction(Result);
  1236. }
  1237. }
  1238. return Result;
  1239. }
  1240. Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
  1241. if (!CanonicalMode) return expandAddRecExprLiterally(S);
  1242. Type *Ty = SE.getEffectiveSCEVType(S->getType());
  1243. const Loop *L = S->getLoop();
  1244. // First check for an existing canonical IV in a suitable type.
  1245. PHINode *CanonicalIV = 0;
  1246. if (PHINode *PN = L->getCanonicalInductionVariable())
  1247. if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
  1248. CanonicalIV = PN;
  1249. // Rewrite an AddRec in terms of the canonical induction variable, if
  1250. // its type is more narrow.
  1251. if (CanonicalIV &&
  1252. SE.getTypeSizeInBits(CanonicalIV->getType()) >
  1253. SE.getTypeSizeInBits(Ty)) {
  1254. SmallVector<const SCEV *, 4> NewOps(S->getNumOperands());
  1255. for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i)
  1256. NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType());
  1257. Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
  1258. S->getNoWrapFlags(SCEV::FlagNW)));
  1259. BasicBlock::iterator NewInsertPt =
  1260. std::next(BasicBlock::iterator(cast<Instruction>(V)));
  1261. BuilderType::InsertPointGuard Guard(Builder);
  1262. while (isa<PHINode>(NewInsertPt) || isa<DbgInfoIntrinsic>(NewInsertPt) ||
  1263. isa<LandingPadInst>(NewInsertPt))
  1264. ++NewInsertPt;
  1265. V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
  1266. NewInsertPt);
  1267. return V;
  1268. }
  1269. // {X,+,F} --> X + {0,+,F}
  1270. if (!S->getStart()->isZero()) {
  1271. SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end());
  1272. NewOps[0] = SE.getConstant(Ty, 0);
  1273. const SCEV *Rest = SE.getAddRecExpr(NewOps, L,
  1274. S->getNoWrapFlags(SCEV::FlagNW));
  1275. // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
  1276. // comments on expandAddToGEP for details.
  1277. const SCEV *Base = S->getStart();
  1278. const SCEV *RestArray[1] = { Rest };
  1279. // Dig into the expression to find the pointer base for a GEP.
  1280. ExposePointerBase(Base, RestArray[0], SE);
  1281. // If we found a pointer, expand the AddRec with a GEP.
  1282. if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) {
  1283. // Make sure the Base isn't something exotic, such as a multiplied
  1284. // or divided pointer value. In those cases, the result type isn't
  1285. // actually a pointer type.
  1286. if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) {
  1287. Value *StartV = expand(Base);
  1288. assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!");
  1289. return expandAddToGEP(RestArray, RestArray+1, PTy, Ty, StartV);
  1290. }
  1291. }
  1292. // Just do a normal add. Pre-expand the operands to suppress folding.
  1293. return expand(SE.getAddExpr(SE.getUnknown(expand(S->getStart())),
  1294. SE.getUnknown(expand(Rest))));
  1295. }
  1296. // If we don't yet have a canonical IV, create one.
  1297. if (!CanonicalIV) {
  1298. // Create and insert the PHI node for the induction variable in the
  1299. // specified loop.
  1300. BasicBlock *Header = L->getHeader();
  1301. pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header);
  1302. CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar",
  1303. Header->begin());
  1304. rememberInstruction(CanonicalIV);
  1305. SmallSet<BasicBlock *, 4> PredSeen;
  1306. Constant *One = ConstantInt::get(Ty, 1);
  1307. for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) {
  1308. BasicBlock *HP = *HPI;
  1309. if (!PredSeen.insert(HP))
  1310. continue;
  1311. if (L->contains(HP)) {
  1312. // Insert a unit add instruction right before the terminator
  1313. // corresponding to the back-edge.
  1314. Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One,
  1315. "indvar.next",
  1316. HP->getTerminator());
  1317. Add->setDebugLoc(HP->getTerminator()->getDebugLoc());
  1318. rememberInstruction(Add);
  1319. CanonicalIV->addIncoming(Add, HP);
  1320. } else {
  1321. CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP);
  1322. }
  1323. }
  1324. }
  1325. // {0,+,1} --> Insert a canonical induction variable into the loop!
  1326. if (S->isAffine() && S->getOperand(1)->isOne()) {
  1327. assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) &&
  1328. "IVs with types different from the canonical IV should "
  1329. "already have been handled!");
  1330. return CanonicalIV;
  1331. }
  1332. // {0,+,F} --> {0,+,1} * F
  1333. // If this is a simple linear addrec, emit it now as a special case.
  1334. if (S->isAffine()) // {0,+,F} --> i*F
  1335. return
  1336. expand(SE.getTruncateOrNoop(
  1337. SE.getMulExpr(SE.getUnknown(CanonicalIV),
  1338. SE.getNoopOrAnyExtend(S->getOperand(1),
  1339. CanonicalIV->getType())),
  1340. Ty));
  1341. // If this is a chain of recurrences, turn it into a closed form, using the
  1342. // folders, then expandCodeFor the closed form. This allows the folders to
  1343. // simplify the expression without having to build a bunch of special code
  1344. // into this folder.
  1345. const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV.
  1346. // Promote S up to the canonical IV type, if the cast is foldable.
  1347. const SCEV *NewS = S;
  1348. const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType());
  1349. if (isa<SCEVAddRecExpr>(Ext))
  1350. NewS = Ext;
  1351. const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE);
  1352. //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n";
  1353. // Truncate the result down to the original type, if needed.
  1354. const SCEV *T = SE.getTruncateOrNoop(V, Ty);
  1355. return expand(T);
  1356. }
  1357. Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) {
  1358. Type *Ty = SE.getEffectiveSCEVType(S->getType());
  1359. Value *V = expandCodeFor(S->getOperand(),
  1360. SE.getEffectiveSCEVType(S->getOperand()->getType()));
  1361. Value *I = Builder.CreateTrunc(V, Ty);
  1362. rememberInstruction(I);
  1363. return I;
  1364. }
  1365. Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) {
  1366. Type *Ty = SE.getEffectiveSCEVType(S->getType());
  1367. Value *V = expandCodeFor(S->getOperand(),
  1368. SE.getEffectiveSCEVType(S->getOperand()->getType()));
  1369. Value *I = Builder.CreateZExt(V, Ty);
  1370. rememberInstruction(I);
  1371. return I;
  1372. }
  1373. Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) {
  1374. Type *Ty = SE.getEffectiveSCEVType(S->getType());
  1375. Value *V = expandCodeFor(S->getOperand(),
  1376. SE.getEffectiveSCEVType(S->getOperand()->getType()));
  1377. Value *I = Builder.CreateSExt(V, Ty);
  1378. rememberInstruction(I);
  1379. return I;
  1380. }
  1381. Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) {
  1382. Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
  1383. Type *Ty = LHS->getType();
  1384. for (int i = S->getNumOperands()-2; i >= 0; --i) {
  1385. // In the case of mixed integer and pointer types, do the
  1386. // rest of the comparisons as integer.
  1387. if (S->getOperand(i)->getType() != Ty) {
  1388. Ty = SE.getEffectiveSCEVType(Ty);
  1389. LHS = InsertNoopCastOfTo(LHS, Ty);
  1390. }
  1391. Value *RHS = expandCodeFor(S->getOperand(i), Ty);
  1392. Value *ICmp = Builder.CreateICmpSGT(LHS, RHS);
  1393. rememberInstruction(ICmp);
  1394. Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax");
  1395. rememberInstruction(Sel);
  1396. LHS = Sel;
  1397. }
  1398. // In the case of mixed integer and pointer types, cast the
  1399. // final result back to the pointer type.
  1400. if (LHS->getType() != S->getType())
  1401. LHS = InsertNoopCastOfTo(LHS, S->getType());
  1402. return LHS;
  1403. }
  1404. Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
  1405. Value *LHS = expand(S->getOperand(S->getNumOperands()-1));
  1406. Type *Ty = LHS->getType();
  1407. for (int i = S->getNumOperands()-2; i >= 0; --i) {
  1408. // In the case of mixed integer and pointer types, do the
  1409. // rest of the comparisons as integer.
  1410. if (S->getOperand(i)->getType() != Ty) {
  1411. Ty = SE.getEffectiveSCEVType(Ty);
  1412. LHS = InsertNoopCastOfTo(LHS, Ty);
  1413. }
  1414. Value *RHS = expandCodeFor(S->getOperand(i), Ty);
  1415. Value *ICmp = Builder.CreateICmpUGT(LHS, RHS);
  1416. rememberInstruction(ICmp);
  1417. Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax");
  1418. rememberInstruction(Sel);
  1419. LHS = Sel;
  1420. }
  1421. // In the case of mixed integer and pointer types, cast the
  1422. // final result back to the pointer type.
  1423. if (LHS->getType() != S->getType())
  1424. LHS = InsertNoopCastOfTo(LHS, S->getType());
  1425. return LHS;
  1426. }
  1427. Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
  1428. Instruction *IP) {
  1429. Builder.SetInsertPoint(IP->getParent(), IP);
  1430. return expandCodeFor(SH, Ty);
  1431. }
  1432. Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) {
  1433. // Expand the code for this SCEV.
  1434. Value *V = expand(SH);
  1435. if (Ty) {
  1436. assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) &&
  1437. "non-trivial casts should be done with the SCEVs directly!");
  1438. V = InsertNoopCastOfTo(V, Ty);
  1439. }
  1440. return V;
  1441. }
  1442. Value *SCEVExpander::expand(const SCEV *S) {
  1443. // Compute an insertion point for this SCEV object. Hoist the instructions
  1444. // as far out in the loop nest as possible.
  1445. Instruction *InsertPt = Builder.GetInsertPoint();
  1446. for (Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock()); ;
  1447. L = L->getParentLoop())
  1448. if (SE.isLoopInvariant(S, L)) {
  1449. if (!L) break;
  1450. if (BasicBlock *Preheader = L->getLoopPreheader())
  1451. InsertPt = Preheader->getTerminator();
  1452. else {
  1453. // LSR sets the insertion point for AddRec start/step values to the
  1454. // block start to simplify value reuse, even though it's an invalid
  1455. // position. SCEVExpander must correct for this in all cases.
  1456. InsertPt = L->getHeader()->getFirstInsertionPt();
  1457. }
  1458. } else {
  1459. // If the SCEV is computable at this level, insert it into the header
  1460. // after the PHIs (and after any other instructions that we've inserted
  1461. // there) so that it is guaranteed to dominate any user inside the loop.
  1462. if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L))
  1463. InsertPt = L->getHeader()->getFirstInsertionPt();
  1464. while (InsertPt != Builder.GetInsertPoint()
  1465. && (isInsertedInstruction(InsertPt)
  1466. || isa<DbgInfoIntrinsic>(InsertPt))) {
  1467. InsertPt = std::next(BasicBlock::iterator(InsertPt));
  1468. }
  1469. break;
  1470. }
  1471. // Check to see if we already expanded this here.
  1472. std::map<std::pair<const SCEV *, Instruction *>, TrackingVH<Value> >::iterator
  1473. I = InsertedExpressions.find(std::make_pair(S, InsertPt));
  1474. if (I != InsertedExpressions.end())
  1475. return I->second;
  1476. BuilderType::InsertPointGuard Guard(Builder);
  1477. Builder.SetInsertPoint(InsertPt->getParent(), InsertPt);
  1478. // Expand the expression into instructions.
  1479. Value *V = visit(S);
  1480. // Remember the expanded value for this SCEV at this location.
  1481. //
  1482. // This is independent of PostIncLoops. The mapped value simply materializes
  1483. // the expression at this insertion point. If the mapped value happened to be
  1484. // a postinc expansion, it could be reused by a non-postinc user, but only if
  1485. // its insertion point was already at the head of the loop.
  1486. InsertedExpressions[std::make_pair(S, InsertPt)] = V;
  1487. return V;
  1488. }
  1489. void SCEVExpander::rememberInstruction(Value *I) {
  1490. if (!PostIncLoops.empty())
  1491. InsertedPostIncValues.insert(I);
  1492. else
  1493. InsertedValues.insert(I);
  1494. }
  1495. /// getOrInsertCanonicalInductionVariable - This method returns the
  1496. /// canonical induction variable of the specified type for the specified
  1497. /// loop (inserting one if there is none). A canonical induction variable
  1498. /// starts at zero and steps by one on each iteration.
  1499. PHINode *
  1500. SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
  1501. Type *Ty) {
  1502. assert(Ty->isIntegerTy() && "Can only insert integer induction variables!");
  1503. // Build a SCEV for {0,+,1}<L>.
  1504. // Conservatively use FlagAnyWrap for now.
  1505. const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0),
  1506. SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap);
  1507. // Emit code for it.
  1508. BuilderType::InsertPointGuard Guard(Builder);
  1509. PHINode *V = cast<PHINode>(expandCodeFor(H, 0, L->getHeader()->begin()));
  1510. return V;
  1511. }
  1512. /// Sort values by integer width for replaceCongruentIVs.
  1513. static bool width_descending(Value *lhs, Value *rhs) {
  1514. // Put pointers at the back and make sure pointer < pointer = false.
  1515. if (!lhs->getType()->isIntegerTy() || !rhs->getType()->isIntegerTy())
  1516. return rhs->getType()->isIntegerTy() && !lhs->getType()->isIntegerTy();
  1517. return rhs->getType()->getPrimitiveSizeInBits()
  1518. < lhs->getType()->getPrimitiveSizeInBits();
  1519. }
  1520. /// replaceCongruentIVs - Check for congruent phis in this loop header and
  1521. /// replace them with their most canonical representative. Return the number of
  1522. /// phis eliminated.
  1523. ///
  1524. /// This does not depend on any SCEVExpander state but should be used in
  1525. /// the same context that SCEVExpander is used.
  1526. unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
  1527. SmallVectorImpl<WeakVH> &DeadInsts,
  1528. const TargetTransformInfo *TTI) {
  1529. // Find integer phis in order of increasing width.
  1530. SmallVector<PHINode*, 8> Phis;
  1531. for (BasicBlock::iterator I = L->getHeader()->begin();
  1532. PHINode *Phi = dyn_cast<PHINode>(I); ++I) {
  1533. Phis.push_back(Phi);
  1534. }
  1535. if (TTI)
  1536. std::sort(Phis.begin(), Phis.end(), width_descending);
  1537. unsigned NumElim = 0;
  1538. DenseMap<const SCEV *, PHINode *> ExprToIVMap;
  1539. // Process phis from wide to narrow. Mapping wide phis to the their truncation
  1540. // so narrow phis can reuse them.
  1541. for (SmallVectorImpl<PHINode*>::const_iterator PIter = Phis.begin(),
  1542. PEnd = Phis.end(); PIter != PEnd; ++PIter) {
  1543. PHINode *Phi = *PIter;
  1544. // Fold constant phis. They may be congruent to other constant phis and
  1545. // would confuse the logic below that expects proper IVs.
  1546. if (Value *V = Phi->hasConstantValue()) {
  1547. Phi->replaceAllUsesWith(V);
  1548. DeadInsts.push_back(Phi);
  1549. ++NumElim;
  1550. DEBUG_WITH_TYPE(DebugType, dbgs()
  1551. << "INDVARS: Eliminated constant iv: " << *Phi << '\n');
  1552. continue;
  1553. }
  1554. if (!SE.isSCEVable(Phi->getType()))
  1555. continue;
  1556. PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)];
  1557. if (!OrigPhiRef) {
  1558. OrigPhiRef = Phi;
  1559. if (Phi->getType()->isIntegerTy() && TTI
  1560. && TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) {
  1561. // This phi can be freely truncated to the narrowest phi type. Map the
  1562. // truncated expression to it so it will be reused for narrow types.
  1563. const SCEV *TruncExpr =
  1564. SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType());
  1565. ExprToIVMap[TruncExpr] = Phi;
  1566. }
  1567. continue;
  1568. }
  1569. // Replacing a pointer phi with an integer phi or vice-versa doesn't make
  1570. // sense.
  1571. if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy())
  1572. continue;
  1573. if (BasicBlock *LatchBlock = L->getLoopLatch()) {
  1574. Instruction *OrigInc =
  1575. cast<Instruction>(OrigPhiRef->getIncomingValueForBlock(LatchBlock));
  1576. Instruction *IsomorphicInc =
  1577. cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock));
  1578. // If this phi has the same width but is more canonical, replace the
  1579. // original with it. As part of the "more canonical" determination,
  1580. // respect a prior decision to use an IV chain.
  1581. if (OrigPhiRef->getType() == Phi->getType()
  1582. && !(ChainedPhis.count(Phi)
  1583. || isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L))
  1584. && (ChainedPhis.count(Phi)
  1585. || isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) {
  1586. std::swap(OrigPhiRef, Phi);
  1587. std::swap(OrigInc, IsomorphicInc);
  1588. }
  1589. // Replacing the congruent phi is sufficient because acyclic redundancy
  1590. // elimination, CSE/GVN, should handle the rest. However, once SCEV proves
  1591. // that a phi is congruent, it's often the head of an IV user cycle that
  1592. // is isomorphic with the original phi. It's worth eagerly cleaning up the
  1593. // common case of a single IV increment so that DeleteDeadPHIs can remove
  1594. // cycles that had postinc uses.
  1595. const SCEV *TruncExpr = SE.getTruncateOrNoop(SE.getSCEV(OrigInc),
  1596. IsomorphicInc->getType());
  1597. if (OrigInc != IsomorphicInc
  1598. && TruncExpr == SE.getSCEV(IsomorphicInc)
  1599. && ((isa<PHINode>(OrigInc) && isa<PHINode>(IsomorphicInc))
  1600. || hoistIVInc(OrigInc, IsomorphicInc))) {
  1601. DEBUG_WITH_TYPE(DebugType, dbgs()
  1602. << "INDVARS: Eliminated congruent iv.inc: "
  1603. << *IsomorphicInc << '\n');
  1604. Value *NewInc = OrigInc;
  1605. if (OrigInc->getType() != IsomorphicInc->getType()) {
  1606. Instruction *IP = isa<PHINode>(OrigInc)
  1607. ? (Instruction*)L->getHeader()->getFirstInsertionPt()
  1608. : OrigInc->getNextNode();
  1609. IRBuilder<> Builder(IP);
  1610. Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc());
  1611. NewInc = Builder.
  1612. CreateTruncOrBitCast(OrigInc, IsomorphicInc->getType(), IVName);
  1613. }
  1614. IsomorphicInc->replaceAllUsesWith(NewInc);
  1615. DeadInsts.push_back(IsomorphicInc);
  1616. }
  1617. }
  1618. DEBUG_WITH_TYPE(DebugType, dbgs()
  1619. << "INDVARS: Eliminated congruent iv: " << *Phi << '\n');
  1620. ++NumElim;
  1621. Value *NewIV = OrigPhiRef;
  1622. if (OrigPhiRef->getType() != Phi->getType()) {
  1623. IRBuilder<> Builder(L->getHeader()->getFirstInsertionPt());
  1624. Builder.SetCurrentDebugLocation(Phi->getDebugLoc());
  1625. NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName);
  1626. }
  1627. Phi->replaceAllUsesWith(NewIV);
  1628. DeadInsts.push_back(Phi);
  1629. }
  1630. return NumElim;
  1631. }
  1632. namespace {
  1633. // Search for a SCEV subexpression that is not safe to expand. Any expression
  1634. // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
  1635. // UDiv expressions. We don't know if the UDiv is derived from an IR divide
  1636. // instruction, but the important thing is that we prove the denominator is
  1637. // nonzero before expansion.
  1638. //
  1639. // IVUsers already checks that IV-derived expressions are safe. So this check is
  1640. // only needed when the expression includes some subexpression that is not IV
  1641. // derived.
  1642. //
  1643. // Currently, we only allow division by a nonzero constant here. If this is
  1644. // inadequate, we could easily allow division by SCEVUnknown by using
  1645. // ValueTracking to check isKnownNonZero().
  1646. //
  1647. // We cannot generally expand recurrences unless the step dominates the loop
  1648. // header. The expander handles the special case of affine recurrences by
  1649. // scaling the recurrence outside the loop, but this technique isn't generally
  1650. // applicable. Expanding a nested recurrence outside a loop requires computing
  1651. // binomial coefficients. This could be done, but the recurrence has to be in a
  1652. // perfectly reduced form, which can't be guaranteed.
  1653. struct SCEVFindUnsafe {
  1654. ScalarEvolution &SE;
  1655. bool IsUnsafe;
  1656. SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {}
  1657. bool follow(const SCEV *S) {
  1658. if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
  1659. const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
  1660. if (!SC || SC->getValue()->isZero()) {
  1661. IsUnsafe = true;
  1662. return false;
  1663. }
  1664. }
  1665. if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
  1666. const SCEV *Step = AR->getStepRecurrence(SE);
  1667. if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) {
  1668. IsUnsafe = true;
  1669. return false;
  1670. }
  1671. }
  1672. return true;
  1673. }
  1674. bool isDone() const { return IsUnsafe; }
  1675. };
  1676. }
  1677. namespace llvm {
  1678. bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) {
  1679. SCEVFindUnsafe Search(SE);
  1680. visitAll(S, Search);
  1681. return !Search.IsUnsafe;
  1682. }
  1683. }