LoopUtils.cpp 61 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648
  1. //===-- LoopUtils.cpp - Loop Utility functions -------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file defines common loop utility functions.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Transforms/Utils/LoopUtils.h"
  14. #include "llvm/ADT/ScopeExit.h"
  15. #include "llvm/Analysis/AliasAnalysis.h"
  16. #include "llvm/Analysis/BasicAliasAnalysis.h"
  17. #include "llvm/Analysis/GlobalsModRef.h"
  18. #include "llvm/Analysis/LoopInfo.h"
  19. #include "llvm/Analysis/LoopPass.h"
  20. #include "llvm/Analysis/ScalarEvolution.h"
  21. #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
  22. #include "llvm/Analysis/ScalarEvolutionExpander.h"
  23. #include "llvm/Analysis/ScalarEvolutionExpressions.h"
  24. #include "llvm/Analysis/TargetTransformInfo.h"
  25. #include "llvm/IR/Dominators.h"
  26. #include "llvm/IR/Instructions.h"
  27. #include "llvm/IR/Module.h"
  28. #include "llvm/IR/PatternMatch.h"
  29. #include "llvm/IR/ValueHandle.h"
  30. #include "llvm/Pass.h"
  31. #include "llvm/Support/Debug.h"
  32. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  33. using namespace llvm;
  34. using namespace llvm::PatternMatch;
  35. #define DEBUG_TYPE "loop-utils"
  36. bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
  37. SmallPtrSetImpl<Instruction *> &Set) {
  38. for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
  39. if (!Set.count(dyn_cast<Instruction>(*Use)))
  40. return false;
  41. return true;
  42. }
  43. bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurrenceKind Kind) {
  44. switch (Kind) {
  45. default:
  46. break;
  47. case RK_IntegerAdd:
  48. case RK_IntegerMult:
  49. case RK_IntegerOr:
  50. case RK_IntegerAnd:
  51. case RK_IntegerXor:
  52. case RK_IntegerMinMax:
  53. return true;
  54. }
  55. return false;
  56. }
  57. bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurrenceKind Kind) {
  58. return (Kind != RK_NoRecurrence) && !isIntegerRecurrenceKind(Kind);
  59. }
  60. bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
  61. switch (Kind) {
  62. default:
  63. break;
  64. case RK_IntegerAdd:
  65. case RK_IntegerMult:
  66. case RK_FloatAdd:
  67. case RK_FloatMult:
  68. return true;
  69. }
  70. return false;
  71. }
  72. Instruction *
  73. RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,
  74. SmallPtrSetImpl<Instruction *> &Visited,
  75. SmallPtrSetImpl<Instruction *> &CI) {
  76. if (!Phi->hasOneUse())
  77. return Phi;
  78. const APInt *M = nullptr;
  79. Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
  80. // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
  81. // with a new integer type of the corresponding bit width.
  82. if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) {
  83. int32_t Bits = (*M + 1).exactLogBase2();
  84. if (Bits > 0) {
  85. RT = IntegerType::get(Phi->getContext(), Bits);
  86. Visited.insert(Phi);
  87. CI.insert(J);
  88. return J;
  89. }
  90. }
  91. return Phi;
  92. }
  93. bool RecurrenceDescriptor::getSourceExtensionKind(
  94. Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned,
  95. SmallPtrSetImpl<Instruction *> &Visited,
  96. SmallPtrSetImpl<Instruction *> &CI) {
  97. SmallVector<Instruction *, 8> Worklist;
  98. bool FoundOneOperand = false;
  99. unsigned DstSize = RT->getPrimitiveSizeInBits();
  100. Worklist.push_back(Exit);
  101. // Traverse the instructions in the reduction expression, beginning with the
  102. // exit value.
  103. while (!Worklist.empty()) {
  104. Instruction *I = Worklist.pop_back_val();
  105. for (Use &U : I->operands()) {
  106. // Terminate the traversal if the operand is not an instruction, or we
  107. // reach the starting value.
  108. Instruction *J = dyn_cast<Instruction>(U.get());
  109. if (!J || J == Start)
  110. continue;
  111. // Otherwise, investigate the operation if it is also in the expression.
  112. if (Visited.count(J)) {
  113. Worklist.push_back(J);
  114. continue;
  115. }
  116. // If the operand is not in Visited, it is not a reduction operation, but
  117. // it does feed into one. Make sure it is either a single-use sign- or
  118. // zero-extend instruction.
  119. CastInst *Cast = dyn_cast<CastInst>(J);
  120. bool IsSExtInst = isa<SExtInst>(J);
  121. if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst))
  122. return false;
  123. // Ensure the source type of the extend is no larger than the reduction
  124. // type. It is not necessary for the types to be identical.
  125. unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
  126. if (SrcSize > DstSize)
  127. return false;
  128. // Furthermore, ensure that all such extends are of the same kind.
  129. if (FoundOneOperand) {
  130. if (IsSigned != IsSExtInst)
  131. return false;
  132. } else {
  133. FoundOneOperand = true;
  134. IsSigned = IsSExtInst;
  135. }
  136. // Lastly, if the source type of the extend matches the reduction type,
  137. // add the extend to CI so that we can avoid accounting for it in the
  138. // cost model.
  139. if (SrcSize == DstSize)
  140. CI.insert(Cast);
  141. }
  142. }
  143. return true;
  144. }
  145. bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
  146. Loop *TheLoop, bool HasFunNoNaNAttr,
  147. RecurrenceDescriptor &RedDes) {
  148. if (Phi->getNumIncomingValues() != 2)
  149. return false;
  150. // Reduction variables are only found in the loop header block.
  151. if (Phi->getParent() != TheLoop->getHeader())
  152. return false;
  153. // Obtain the reduction start value from the value that comes from the loop
  154. // preheader.
  155. Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
  156. // ExitInstruction is the single value which is used outside the loop.
  157. // We only allow for a single reduction value to be used outside the loop.
  158. // This includes users of the reduction, variables (which form a cycle
  159. // which ends in the phi node).
  160. Instruction *ExitInstruction = nullptr;
  161. // Indicates that we found a reduction operation in our scan.
  162. bool FoundReduxOp = false;
  163. // We start with the PHI node and scan for all of the users of this
  164. // instruction. All users must be instructions that can be used as reduction
  165. // variables (such as ADD). We must have a single out-of-block user. The cycle
  166. // must include the original PHI.
  167. bool FoundStartPHI = false;
  168. // To recognize min/max patterns formed by a icmp select sequence, we store
  169. // the number of instruction we saw from the recognized min/max pattern,
  170. // to make sure we only see exactly the two instructions.
  171. unsigned NumCmpSelectPatternInst = 0;
  172. InstDesc ReduxDesc(false, nullptr);
  173. // Data used for determining if the recurrence has been type-promoted.
  174. Type *RecurrenceType = Phi->getType();
  175. SmallPtrSet<Instruction *, 4> CastInsts;
  176. Instruction *Start = Phi;
  177. bool IsSigned = false;
  178. SmallPtrSet<Instruction *, 8> VisitedInsts;
  179. SmallVector<Instruction *, 8> Worklist;
  180. // Return early if the recurrence kind does not match the type of Phi. If the
  181. // recurrence kind is arithmetic, we attempt to look through AND operations
  182. // resulting from the type promotion performed by InstCombine. Vector
  183. // operations are not limited to the legal integer widths, so we may be able
  184. // to evaluate the reduction in the narrower width.
  185. if (RecurrenceType->isFloatingPointTy()) {
  186. if (!isFloatingPointRecurrenceKind(Kind))
  187. return false;
  188. } else {
  189. if (!isIntegerRecurrenceKind(Kind))
  190. return false;
  191. if (isArithmeticRecurrenceKind(Kind))
  192. Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
  193. }
  194. Worklist.push_back(Start);
  195. VisitedInsts.insert(Start);
  196. // A value in the reduction can be used:
  197. // - By the reduction:
  198. // - Reduction operation:
  199. // - One use of reduction value (safe).
  200. // - Multiple use of reduction value (not safe).
  201. // - PHI:
  202. // - All uses of the PHI must be the reduction (safe).
  203. // - Otherwise, not safe.
  204. // - By instructions outside of the loop (safe).
  205. // * One value may have several outside users, but all outside
  206. // uses must be of the same value.
  207. // - By an instruction that is not part of the reduction (not safe).
  208. // This is either:
  209. // * An instruction type other than PHI or the reduction operation.
  210. // * A PHI in the header other than the initial PHI.
  211. while (!Worklist.empty()) {
  212. Instruction *Cur = Worklist.back();
  213. Worklist.pop_back();
  214. // No Users.
  215. // If the instruction has no users then this is a broken chain and can't be
  216. // a reduction variable.
  217. if (Cur->use_empty())
  218. return false;
  219. bool IsAPhi = isa<PHINode>(Cur);
  220. // A header PHI use other than the original PHI.
  221. if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
  222. return false;
  223. // Reductions of instructions such as Div, and Sub is only possible if the
  224. // LHS is the reduction variable.
  225. if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
  226. !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
  227. !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
  228. return false;
  229. // Any reduction instruction must be of one of the allowed kinds. We ignore
  230. // the starting value (the Phi or an AND instruction if the Phi has been
  231. // type-promoted).
  232. if (Cur != Start) {
  233. ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, HasFunNoNaNAttr);
  234. if (!ReduxDesc.isRecurrence())
  235. return false;
  236. }
  237. // A reduction operation must only have one use of the reduction value.
  238. if (!IsAPhi && Kind != RK_IntegerMinMax && Kind != RK_FloatMinMax &&
  239. hasMultipleUsesOf(Cur, VisitedInsts))
  240. return false;
  241. // All inputs to a PHI node must be a reduction value.
  242. if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
  243. return false;
  244. if (Kind == RK_IntegerMinMax &&
  245. (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
  246. ++NumCmpSelectPatternInst;
  247. if (Kind == RK_FloatMinMax && (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
  248. ++NumCmpSelectPatternInst;
  249. // Check whether we found a reduction operator.
  250. FoundReduxOp |= !IsAPhi && Cur != Start;
  251. // Process users of current instruction. Push non-PHI nodes after PHI nodes
  252. // onto the stack. This way we are going to have seen all inputs to PHI
  253. // nodes once we get to them.
  254. SmallVector<Instruction *, 8> NonPHIs;
  255. SmallVector<Instruction *, 8> PHIs;
  256. for (User *U : Cur->users()) {
  257. Instruction *UI = cast<Instruction>(U);
  258. // Check if we found the exit user.
  259. BasicBlock *Parent = UI->getParent();
  260. if (!TheLoop->contains(Parent)) {
  261. // If we already know this instruction is used externally, move on to
  262. // the next user.
  263. if (ExitInstruction == Cur)
  264. continue;
  265. // Exit if you find multiple values used outside or if the header phi
  266. // node is being used. In this case the user uses the value of the
  267. // previous iteration, in which case we would loose "VF-1" iterations of
  268. // the reduction operation if we vectorize.
  269. if (ExitInstruction != nullptr || Cur == Phi)
  270. return false;
  271. // The instruction used by an outside user must be the last instruction
  272. // before we feed back to the reduction phi. Otherwise, we loose VF-1
  273. // operations on the value.
  274. if (!is_contained(Phi->operands(), Cur))
  275. return false;
  276. ExitInstruction = Cur;
  277. continue;
  278. }
  279. // Process instructions only once (termination). Each reduction cycle
  280. // value must only be used once, except by phi nodes and min/max
  281. // reductions which are represented as a cmp followed by a select.
  282. InstDesc IgnoredVal(false, nullptr);
  283. if (VisitedInsts.insert(UI).second) {
  284. if (isa<PHINode>(UI))
  285. PHIs.push_back(UI);
  286. else
  287. NonPHIs.push_back(UI);
  288. } else if (!isa<PHINode>(UI) &&
  289. ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
  290. !isa<SelectInst>(UI)) ||
  291. !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence()))
  292. return false;
  293. // Remember that we completed the cycle.
  294. if (UI == Phi)
  295. FoundStartPHI = true;
  296. }
  297. Worklist.append(PHIs.begin(), PHIs.end());
  298. Worklist.append(NonPHIs.begin(), NonPHIs.end());
  299. }
  300. // This means we have seen one but not the other instruction of the
  301. // pattern or more than just a select and cmp.
  302. if ((Kind == RK_IntegerMinMax || Kind == RK_FloatMinMax) &&
  303. NumCmpSelectPatternInst != 2)
  304. return false;
  305. if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
  306. return false;
  307. // If we think Phi may have been type-promoted, we also need to ensure that
  308. // all source operands of the reduction are either SExtInsts or ZEstInsts. If
  309. // so, we will be able to evaluate the reduction in the narrower bit width.
  310. if (Start != Phi)
  311. if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType,
  312. IsSigned, VisitedInsts, CastInsts))
  313. return false;
  314. // We found a reduction var if we have reached the original phi node and we
  315. // only have a single instruction with out-of-loop users.
  316. // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
  317. // is saved as part of the RecurrenceDescriptor.
  318. // Save the description of this reduction variable.
  319. RecurrenceDescriptor RD(
  320. RdxStart, ExitInstruction, Kind, ReduxDesc.getMinMaxKind(),
  321. ReduxDesc.getUnsafeAlgebraInst(), RecurrenceType, IsSigned, CastInsts);
  322. RedDes = RD;
  323. return true;
  324. }
  325. /// Returns true if the instruction is a Select(ICmp(X, Y), X, Y) instruction
  326. /// pattern corresponding to a min(X, Y) or max(X, Y).
  327. RecurrenceDescriptor::InstDesc
  328. RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I, InstDesc &Prev) {
  329. assert((isa<ICmpInst>(I) || isa<FCmpInst>(I) || isa<SelectInst>(I)) &&
  330. "Expect a select instruction");
  331. Instruction *Cmp = nullptr;
  332. SelectInst *Select = nullptr;
  333. // We must handle the select(cmp()) as a single instruction. Advance to the
  334. // select.
  335. if ((Cmp = dyn_cast<ICmpInst>(I)) || (Cmp = dyn_cast<FCmpInst>(I))) {
  336. if (!Cmp->hasOneUse() || !(Select = dyn_cast<SelectInst>(*I->user_begin())))
  337. return InstDesc(false, I);
  338. return InstDesc(Select, Prev.getMinMaxKind());
  339. }
  340. // Only handle single use cases for now.
  341. if (!(Select = dyn_cast<SelectInst>(I)))
  342. return InstDesc(false, I);
  343. if (!(Cmp = dyn_cast<ICmpInst>(I->getOperand(0))) &&
  344. !(Cmp = dyn_cast<FCmpInst>(I->getOperand(0))))
  345. return InstDesc(false, I);
  346. if (!Cmp->hasOneUse())
  347. return InstDesc(false, I);
  348. Value *CmpLeft;
  349. Value *CmpRight;
  350. // Look for a min/max pattern.
  351. if (m_UMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
  352. return InstDesc(Select, MRK_UIntMin);
  353. else if (m_UMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
  354. return InstDesc(Select, MRK_UIntMax);
  355. else if (m_SMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
  356. return InstDesc(Select, MRK_SIntMax);
  357. else if (m_SMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
  358. return InstDesc(Select, MRK_SIntMin);
  359. else if (m_OrdFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
  360. return InstDesc(Select, MRK_FloatMin);
  361. else if (m_OrdFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
  362. return InstDesc(Select, MRK_FloatMax);
  363. else if (m_UnordFMin(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
  364. return InstDesc(Select, MRK_FloatMin);
  365. else if (m_UnordFMax(m_Value(CmpLeft), m_Value(CmpRight)).match(Select))
  366. return InstDesc(Select, MRK_FloatMax);
  367. return InstDesc(false, I);
  368. }
  369. RecurrenceDescriptor::InstDesc
  370. RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurrenceKind Kind,
  371. InstDesc &Prev, bool HasFunNoNaNAttr) {
  372. bool FP = I->getType()->isFloatingPointTy();
  373. Instruction *UAI = Prev.getUnsafeAlgebraInst();
  374. if (!UAI && FP && !I->isFast())
  375. UAI = I; // Found an unsafe (unvectorizable) algebra instruction.
  376. switch (I->getOpcode()) {
  377. default:
  378. return InstDesc(false, I);
  379. case Instruction::PHI:
  380. return InstDesc(I, Prev.getMinMaxKind(), Prev.getUnsafeAlgebraInst());
  381. case Instruction::Sub:
  382. case Instruction::Add:
  383. return InstDesc(Kind == RK_IntegerAdd, I);
  384. case Instruction::Mul:
  385. return InstDesc(Kind == RK_IntegerMult, I);
  386. case Instruction::And:
  387. return InstDesc(Kind == RK_IntegerAnd, I);
  388. case Instruction::Or:
  389. return InstDesc(Kind == RK_IntegerOr, I);
  390. case Instruction::Xor:
  391. return InstDesc(Kind == RK_IntegerXor, I);
  392. case Instruction::FMul:
  393. return InstDesc(Kind == RK_FloatMult, I, UAI);
  394. case Instruction::FSub:
  395. case Instruction::FAdd:
  396. return InstDesc(Kind == RK_FloatAdd, I, UAI);
  397. case Instruction::FCmp:
  398. case Instruction::ICmp:
  399. case Instruction::Select:
  400. if (Kind != RK_IntegerMinMax &&
  401. (!HasFunNoNaNAttr || Kind != RK_FloatMinMax))
  402. return InstDesc(false, I);
  403. return isMinMaxSelectCmpPattern(I, Prev);
  404. }
  405. }
  406. bool RecurrenceDescriptor::hasMultipleUsesOf(
  407. Instruction *I, SmallPtrSetImpl<Instruction *> &Insts) {
  408. unsigned NumUses = 0;
  409. for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E;
  410. ++Use) {
  411. if (Insts.count(dyn_cast<Instruction>(*Use)))
  412. ++NumUses;
  413. if (NumUses > 1)
  414. return true;
  415. }
  416. return false;
  417. }
  418. bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
  419. RecurrenceDescriptor &RedDes) {
  420. BasicBlock *Header = TheLoop->getHeader();
  421. Function &F = *Header->getParent();
  422. bool HasFunNoNaNAttr =
  423. F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
  424. if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
  425. DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
  426. return true;
  427. }
  428. if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
  429. DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
  430. return true;
  431. }
  432. if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) {
  433. DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
  434. return true;
  435. }
  436. if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) {
  437. DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
  438. return true;
  439. }
  440. if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) {
  441. DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
  442. return true;
  443. }
  444. if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr,
  445. RedDes)) {
  446. DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
  447. return true;
  448. }
  449. if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
  450. DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
  451. return true;
  452. }
  453. if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
  454. DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
  455. return true;
  456. }
  457. if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) {
  458. DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n");
  459. return true;
  460. }
  461. // Not a reduction of known type.
  462. return false;
  463. }
  464. bool RecurrenceDescriptor::isFirstOrderRecurrence(
  465. PHINode *Phi, Loop *TheLoop,
  466. DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
  467. // Ensure the phi node is in the loop header and has two incoming values.
  468. if (Phi->getParent() != TheLoop->getHeader() ||
  469. Phi->getNumIncomingValues() != 2)
  470. return false;
  471. // Ensure the loop has a preheader and a single latch block. The loop
  472. // vectorizer will need the latch to set up the next iteration of the loop.
  473. auto *Preheader = TheLoop->getLoopPreheader();
  474. auto *Latch = TheLoop->getLoopLatch();
  475. if (!Preheader || !Latch)
  476. return false;
  477. // Ensure the phi node's incoming blocks are the loop preheader and latch.
  478. if (Phi->getBasicBlockIndex(Preheader) < 0 ||
  479. Phi->getBasicBlockIndex(Latch) < 0)
  480. return false;
  481. // Get the previous value. The previous value comes from the latch edge while
  482. // the initial value comes form the preheader edge.
  483. auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
  484. if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
  485. SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
  486. return false;
  487. // Ensure every user of the phi node is dominated by the previous value.
  488. // The dominance requirement ensures the loop vectorizer will not need to
  489. // vectorize the initial value prior to the first iteration of the loop.
  490. // TODO: Consider extending this sinking to handle other kinds of instructions
  491. // and expressions, beyond sinking a single cast past Previous.
  492. if (Phi->hasOneUse()) {
  493. auto *I = Phi->user_back();
  494. if (I->isCast() && (I->getParent() == Phi->getParent()) && I->hasOneUse() &&
  495. DT->dominates(Previous, I->user_back())) {
  496. if (!DT->dominates(Previous, I)) // Otherwise we're good w/o sinking.
  497. SinkAfter[I] = Previous;
  498. return true;
  499. }
  500. }
  501. for (User *U : Phi->users())
  502. if (auto *I = dyn_cast<Instruction>(U)) {
  503. if (!DT->dominates(Previous, I))
  504. return false;
  505. }
  506. return true;
  507. }
  508. /// This function returns the identity element (or neutral element) for
  509. /// the operation K.
  510. Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurrenceKind K,
  511. Type *Tp) {
  512. switch (K) {
  513. case RK_IntegerXor:
  514. case RK_IntegerAdd:
  515. case RK_IntegerOr:
  516. // Adding, Xoring, Oring zero to a number does not change it.
  517. return ConstantInt::get(Tp, 0);
  518. case RK_IntegerMult:
  519. // Multiplying a number by 1 does not change it.
  520. return ConstantInt::get(Tp, 1);
  521. case RK_IntegerAnd:
  522. // AND-ing a number with an all-1 value does not change it.
  523. return ConstantInt::get(Tp, -1, true);
  524. case RK_FloatMult:
  525. // Multiplying a number by 1 does not change it.
  526. return ConstantFP::get(Tp, 1.0L);
  527. case RK_FloatAdd:
  528. // Adding zero to a number does not change it.
  529. return ConstantFP::get(Tp, 0.0L);
  530. default:
  531. llvm_unreachable("Unknown recurrence kind");
  532. }
  533. }
  534. /// This function translates the recurrence kind to an LLVM binary operator.
  535. unsigned RecurrenceDescriptor::getRecurrenceBinOp(RecurrenceKind Kind) {
  536. switch (Kind) {
  537. case RK_IntegerAdd:
  538. return Instruction::Add;
  539. case RK_IntegerMult:
  540. return Instruction::Mul;
  541. case RK_IntegerOr:
  542. return Instruction::Or;
  543. case RK_IntegerAnd:
  544. return Instruction::And;
  545. case RK_IntegerXor:
  546. return Instruction::Xor;
  547. case RK_FloatMult:
  548. return Instruction::FMul;
  549. case RK_FloatAdd:
  550. return Instruction::FAdd;
  551. case RK_IntegerMinMax:
  552. return Instruction::ICmp;
  553. case RK_FloatMinMax:
  554. return Instruction::FCmp;
  555. default:
  556. llvm_unreachable("Unknown recurrence operation");
  557. }
  558. }
  559. Value *RecurrenceDescriptor::createMinMaxOp(IRBuilder<> &Builder,
  560. MinMaxRecurrenceKind RK,
  561. Value *Left, Value *Right) {
  562. CmpInst::Predicate P = CmpInst::ICMP_NE;
  563. switch (RK) {
  564. default:
  565. llvm_unreachable("Unknown min/max recurrence kind");
  566. case MRK_UIntMin:
  567. P = CmpInst::ICMP_ULT;
  568. break;
  569. case MRK_UIntMax:
  570. P = CmpInst::ICMP_UGT;
  571. break;
  572. case MRK_SIntMin:
  573. P = CmpInst::ICMP_SLT;
  574. break;
  575. case MRK_SIntMax:
  576. P = CmpInst::ICMP_SGT;
  577. break;
  578. case MRK_FloatMin:
  579. P = CmpInst::FCMP_OLT;
  580. break;
  581. case MRK_FloatMax:
  582. P = CmpInst::FCMP_OGT;
  583. break;
  584. }
  585. // We only match FP sequences that are 'fast', so we can unconditionally
  586. // set it on any generated instructions.
  587. IRBuilder<>::FastMathFlagGuard FMFG(Builder);
  588. FastMathFlags FMF;
  589. FMF.setFast();
  590. Builder.setFastMathFlags(FMF);
  591. Value *Cmp;
  592. if (RK == MRK_FloatMin || RK == MRK_FloatMax)
  593. Cmp = Builder.CreateFCmp(P, Left, Right, "rdx.minmax.cmp");
  594. else
  595. Cmp = Builder.CreateICmp(P, Left, Right, "rdx.minmax.cmp");
  596. Value *Select = Builder.CreateSelect(Cmp, Left, Right, "rdx.minmax.select");
  597. return Select;
  598. }
  599. InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
  600. const SCEV *Step, BinaryOperator *BOp,
  601. SmallVectorImpl<Instruction *> *Casts)
  602. : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
  603. assert(IK != IK_NoInduction && "Not an induction");
  604. // Start value type should match the induction kind and the value
  605. // itself should not be null.
  606. assert(StartValue && "StartValue is null");
  607. assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
  608. "StartValue is not a pointer for pointer induction");
  609. assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
  610. "StartValue is not an integer for integer induction");
  611. // Check the Step Value. It should be non-zero integer value.
  612. assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
  613. "Step value is zero");
  614. assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
  615. "Step value should be constant for pointer induction");
  616. assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
  617. "StepValue is not an integer");
  618. assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
  619. "StepValue is not FP for FpInduction");
  620. assert((IK != IK_FpInduction || (InductionBinOp &&
  621. (InductionBinOp->getOpcode() == Instruction::FAdd ||
  622. InductionBinOp->getOpcode() == Instruction::FSub))) &&
  623. "Binary opcode should be specified for FP induction");
  624. if (Casts) {
  625. for (auto &Inst : *Casts) {
  626. RedundantCasts.push_back(Inst);
  627. }
  628. }
  629. }
  630. int InductionDescriptor::getConsecutiveDirection() const {
  631. ConstantInt *ConstStep = getConstIntStepValue();
  632. if (ConstStep && (ConstStep->isOne() || ConstStep->isMinusOne()))
  633. return ConstStep->getSExtValue();
  634. return 0;
  635. }
  636. ConstantInt *InductionDescriptor::getConstIntStepValue() const {
  637. if (isa<SCEVConstant>(Step))
  638. return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
  639. return nullptr;
  640. }
  641. Value *InductionDescriptor::transform(IRBuilder<> &B, Value *Index,
  642. ScalarEvolution *SE,
  643. const DataLayout& DL) const {
  644. SCEVExpander Exp(*SE, DL, "induction");
  645. assert(Index->getType() == Step->getType() &&
  646. "Index type does not match StepValue type");
  647. switch (IK) {
  648. case IK_IntInduction: {
  649. assert(Index->getType() == StartValue->getType() &&
  650. "Index type does not match StartValue type");
  651. // FIXME: Theoretically, we can call getAddExpr() of ScalarEvolution
  652. // and calculate (Start + Index * Step) for all cases, without
  653. // special handling for "isOne" and "isMinusOne".
  654. // But in the real life the result code getting worse. We mix SCEV
  655. // expressions and ADD/SUB operations and receive redundant
  656. // intermediate values being calculated in different ways and
  657. // Instcombine is unable to reduce them all.
  658. if (getConstIntStepValue() &&
  659. getConstIntStepValue()->isMinusOne())
  660. return B.CreateSub(StartValue, Index);
  661. if (getConstIntStepValue() &&
  662. getConstIntStepValue()->isOne())
  663. return B.CreateAdd(StartValue, Index);
  664. const SCEV *S = SE->getAddExpr(SE->getSCEV(StartValue),
  665. SE->getMulExpr(Step, SE->getSCEV(Index)));
  666. return Exp.expandCodeFor(S, StartValue->getType(), &*B.GetInsertPoint());
  667. }
  668. case IK_PtrInduction: {
  669. assert(isa<SCEVConstant>(Step) &&
  670. "Expected constant step for pointer induction");
  671. const SCEV *S = SE->getMulExpr(SE->getSCEV(Index), Step);
  672. Index = Exp.expandCodeFor(S, Index->getType(), &*B.GetInsertPoint());
  673. return B.CreateGEP(nullptr, StartValue, Index);
  674. }
  675. case IK_FpInduction: {
  676. assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value");
  677. assert(InductionBinOp &&
  678. (InductionBinOp->getOpcode() == Instruction::FAdd ||
  679. InductionBinOp->getOpcode() == Instruction::FSub) &&
  680. "Original bin op should be defined for FP induction");
  681. Value *StepValue = cast<SCEVUnknown>(Step)->getValue();
  682. // Floating point operations had to be 'fast' to enable the induction.
  683. FastMathFlags Flags;
  684. Flags.setFast();
  685. Value *MulExp = B.CreateFMul(StepValue, Index);
  686. if (isa<Instruction>(MulExp))
  687. // We have to check, the MulExp may be a constant.
  688. cast<Instruction>(MulExp)->setFastMathFlags(Flags);
  689. Value *BOp = B.CreateBinOp(InductionBinOp->getOpcode() , StartValue,
  690. MulExp, "induction");
  691. if (isa<Instruction>(BOp))
  692. cast<Instruction>(BOp)->setFastMathFlags(Flags);
  693. return BOp;
  694. }
  695. case IK_NoInduction:
  696. return nullptr;
  697. }
  698. llvm_unreachable("invalid enum");
  699. }
  700. bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
  701. ScalarEvolution *SE,
  702. InductionDescriptor &D) {
  703. // Here we only handle FP induction variables.
  704. assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
  705. if (TheLoop->getHeader() != Phi->getParent())
  706. return false;
  707. // The loop may have multiple entrances or multiple exits; we can analyze
  708. // this phi if it has a unique entry value and a unique backedge value.
  709. if (Phi->getNumIncomingValues() != 2)
  710. return false;
  711. Value *BEValue = nullptr, *StartValue = nullptr;
  712. if (TheLoop->contains(Phi->getIncomingBlock(0))) {
  713. BEValue = Phi->getIncomingValue(0);
  714. StartValue = Phi->getIncomingValue(1);
  715. } else {
  716. assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
  717. "Unexpected Phi node in the loop");
  718. BEValue = Phi->getIncomingValue(1);
  719. StartValue = Phi->getIncomingValue(0);
  720. }
  721. BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
  722. if (!BOp)
  723. return false;
  724. Value *Addend = nullptr;
  725. if (BOp->getOpcode() == Instruction::FAdd) {
  726. if (BOp->getOperand(0) == Phi)
  727. Addend = BOp->getOperand(1);
  728. else if (BOp->getOperand(1) == Phi)
  729. Addend = BOp->getOperand(0);
  730. } else if (BOp->getOpcode() == Instruction::FSub)
  731. if (BOp->getOperand(0) == Phi)
  732. Addend = BOp->getOperand(1);
  733. if (!Addend)
  734. return false;
  735. // The addend should be loop invariant
  736. if (auto *I = dyn_cast<Instruction>(Addend))
  737. if (TheLoop->contains(I))
  738. return false;
  739. // FP Step has unknown SCEV
  740. const SCEV *Step = SE->getUnknown(Addend);
  741. D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
  742. return true;
  743. }
  744. /// This function is called when we suspect that the update-chain of a phi node
  745. /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
  746. /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
  747. /// predicate P under which the SCEV expression for the phi can be the
  748. /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
  749. /// cast instructions that are involved in the update-chain of this induction.
  750. /// A caller that adds the required runtime predicate can be free to drop these
  751. /// cast instructions, and compute the phi using \p AR (instead of some scev
  752. /// expression with casts).
  753. ///
  754. /// For example, without a predicate the scev expression can take the following
  755. /// form:
  756. /// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
  757. ///
  758. /// It corresponds to the following IR sequence:
  759. /// %for.body:
  760. /// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
  761. /// %casted_phi = "ExtTrunc i64 %x"
  762. /// %add = add i64 %casted_phi, %step
  763. ///
  764. /// where %x is given in \p PN,
  765. /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
  766. /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
  767. /// several forms, for example, such as:
  768. /// ExtTrunc1: %casted_phi = and %x, 2^n-1
  769. /// or:
  770. /// ExtTrunc2: %t = shl %x, m
  771. /// %casted_phi = ashr %t, m
  772. ///
  773. /// If we are able to find such sequence, we return the instructions
  774. /// we found, namely %casted_phi and the instructions on its use-def chain up
  775. /// to the phi (not including the phi).
  776. static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
  777. const SCEVUnknown *PhiScev,
  778. const SCEVAddRecExpr *AR,
  779. SmallVectorImpl<Instruction *> &CastInsts) {
  780. assert(CastInsts.empty() && "CastInsts is expected to be empty.");
  781. auto *PN = cast<PHINode>(PhiScev->getValue());
  782. assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
  783. const Loop *L = AR->getLoop();
  784. // Find any cast instructions that participate in the def-use chain of
  785. // PhiScev in the loop.
  786. // FORNOW/TODO: We currently expect the def-use chain to include only
  787. // two-operand instructions, where one of the operands is an invariant.
  788. // createAddRecFromPHIWithCasts() currently does not support anything more
  789. // involved than that, so we keep the search simple. This can be
  790. // extended/generalized as needed.
  791. auto getDef = [&](const Value *Val) -> Value * {
  792. const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
  793. if (!BinOp)
  794. return nullptr;
  795. Value *Op0 = BinOp->getOperand(0);
  796. Value *Op1 = BinOp->getOperand(1);
  797. Value *Def = nullptr;
  798. if (L->isLoopInvariant(Op0))
  799. Def = Op1;
  800. else if (L->isLoopInvariant(Op1))
  801. Def = Op0;
  802. return Def;
  803. };
  804. // Look for the instruction that defines the induction via the
  805. // loop backedge.
  806. BasicBlock *Latch = L->getLoopLatch();
  807. if (!Latch)
  808. return false;
  809. Value *Val = PN->getIncomingValueForBlock(Latch);
  810. if (!Val)
  811. return false;
  812. // Follow the def-use chain until the induction phi is reached.
  813. // If on the way we encounter a Value that has the same SCEV Expr as the
  814. // phi node, we can consider the instructions we visit from that point
  815. // as part of the cast-sequence that can be ignored.
  816. bool InCastSequence = false;
  817. auto *Inst = dyn_cast<Instruction>(Val);
  818. while (Val != PN) {
  819. // If we encountered a phi node other than PN, or if we left the loop,
  820. // we bail out.
  821. if (!Inst || !L->contains(Inst)) {
  822. return false;
  823. }
  824. auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
  825. if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
  826. InCastSequence = true;
  827. if (InCastSequence) {
  828. // Only the last instruction in the cast sequence is expected to have
  829. // uses outside the induction def-use chain.
  830. if (!CastInsts.empty())
  831. if (!Inst->hasOneUse())
  832. return false;
  833. CastInsts.push_back(Inst);
  834. }
  835. Val = getDef(Val);
  836. if (!Val)
  837. return false;
  838. Inst = dyn_cast<Instruction>(Val);
  839. }
  840. return InCastSequence;
  841. }
  842. bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
  843. PredicatedScalarEvolution &PSE,
  844. InductionDescriptor &D,
  845. bool Assume) {
  846. Type *PhiTy = Phi->getType();
  847. // Handle integer and pointer inductions variables.
  848. // Now we handle also FP induction but not trying to make a
  849. // recurrent expression from the PHI node in-place.
  850. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() &&
  851. !PhiTy->isFloatTy() && !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
  852. return false;
  853. if (PhiTy->isFloatingPointTy())
  854. return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
  855. const SCEV *PhiScev = PSE.getSCEV(Phi);
  856. const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
  857. // We need this expression to be an AddRecExpr.
  858. if (Assume && !AR)
  859. AR = PSE.getAsAddRec(Phi);
  860. if (!AR) {
  861. DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
  862. return false;
  863. }
  864. // Record any Cast instructions that participate in the induction update
  865. const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
  866. // If we started from an UnknownSCEV, and managed to build an addRecurrence
  867. // only after enabling Assume with PSCEV, this means we may have encountered
  868. // cast instructions that required adding a runtime check in order to
  869. // guarantee the correctness of the AddRecurence respresentation of the
  870. // induction.
  871. if (PhiScev != AR && SymbolicPhi) {
  872. SmallVector<Instruction *, 2> Casts;
  873. if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
  874. return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
  875. }
  876. return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
  877. }
  878. bool InductionDescriptor::isInductionPHI(
  879. PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
  880. InductionDescriptor &D, const SCEV *Expr,
  881. SmallVectorImpl<Instruction *> *CastsToIgnore) {
  882. Type *PhiTy = Phi->getType();
  883. // We only handle integer and pointer inductions variables.
  884. if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
  885. return false;
  886. // Check that the PHI is consecutive.
  887. const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
  888. const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
  889. if (!AR) {
  890. DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
  891. return false;
  892. }
  893. if (AR->getLoop() != TheLoop) {
  894. // FIXME: We should treat this as a uniform. Unfortunately, we
  895. // don't currently know how to handled uniform PHIs.
  896. DEBUG(dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
  897. return false;
  898. }
  899. Value *StartValue =
  900. Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
  901. const SCEV *Step = AR->getStepRecurrence(*SE);
  902. // Calculate the pointer stride and check if it is consecutive.
  903. // The stride may be a constant or a loop invariant integer value.
  904. const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
  905. if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
  906. return false;
  907. if (PhiTy->isIntegerTy()) {
  908. D = InductionDescriptor(StartValue, IK_IntInduction, Step, /*BOp=*/ nullptr,
  909. CastsToIgnore);
  910. return true;
  911. }
  912. assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
  913. // Pointer induction should be a constant.
  914. if (!ConstStep)
  915. return false;
  916. ConstantInt *CV = ConstStep->getValue();
  917. Type *PointerElementType = PhiTy->getPointerElementType();
  918. // The pointer stride cannot be determined if the pointer element type is not
  919. // sized.
  920. if (!PointerElementType->isSized())
  921. return false;
  922. const DataLayout &DL = Phi->getModule()->getDataLayout();
  923. int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
  924. if (!Size)
  925. return false;
  926. int64_t CVSize = CV->getSExtValue();
  927. if (CVSize % Size)
  928. return false;
  929. auto *StepValue = SE->getConstant(CV->getType(), CVSize / Size,
  930. true /* signed */);
  931. D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue);
  932. return true;
  933. }
  934. bool llvm::formDedicatedExitBlocks(Loop *L, DominatorTree *DT, LoopInfo *LI,
  935. bool PreserveLCSSA) {
  936. bool Changed = false;
  937. // We re-use a vector for the in-loop predecesosrs.
  938. SmallVector<BasicBlock *, 4> InLoopPredecessors;
  939. auto RewriteExit = [&](BasicBlock *BB) {
  940. assert(InLoopPredecessors.empty() &&
  941. "Must start with an empty predecessors list!");
  942. auto Cleanup = make_scope_exit([&] { InLoopPredecessors.clear(); });
  943. // See if there are any non-loop predecessors of this exit block and
  944. // keep track of the in-loop predecessors.
  945. bool IsDedicatedExit = true;
  946. for (auto *PredBB : predecessors(BB))
  947. if (L->contains(PredBB)) {
  948. if (isa<IndirectBrInst>(PredBB->getTerminator()))
  949. // We cannot rewrite exiting edges from an indirectbr.
  950. return false;
  951. InLoopPredecessors.push_back(PredBB);
  952. } else {
  953. IsDedicatedExit = false;
  954. }
  955. assert(!InLoopPredecessors.empty() && "Must have *some* loop predecessor!");
  956. // Nothing to do if this is already a dedicated exit.
  957. if (IsDedicatedExit)
  958. return false;
  959. auto *NewExitBB = SplitBlockPredecessors(
  960. BB, InLoopPredecessors, ".loopexit", DT, LI, PreserveLCSSA);
  961. if (!NewExitBB)
  962. DEBUG(dbgs() << "WARNING: Can't create a dedicated exit block for loop: "
  963. << *L << "\n");
  964. else
  965. DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block "
  966. << NewExitBB->getName() << "\n");
  967. return true;
  968. };
  969. // Walk the exit blocks directly rather than building up a data structure for
  970. // them, but only visit each one once.
  971. SmallPtrSet<BasicBlock *, 4> Visited;
  972. for (auto *BB : L->blocks())
  973. for (auto *SuccBB : successors(BB)) {
  974. // We're looking for exit blocks so skip in-loop successors.
  975. if (L->contains(SuccBB))
  976. continue;
  977. // Visit each exit block exactly once.
  978. if (!Visited.insert(SuccBB).second)
  979. continue;
  980. Changed |= RewriteExit(SuccBB);
  981. }
  982. return Changed;
  983. }
  984. /// \brief Returns the instructions that use values defined in the loop.
  985. SmallVector<Instruction *, 8> llvm::findDefsUsedOutsideOfLoop(Loop *L) {
  986. SmallVector<Instruction *, 8> UsedOutside;
  987. for (auto *Block : L->getBlocks())
  988. // FIXME: I believe that this could use copy_if if the Inst reference could
  989. // be adapted into a pointer.
  990. for (auto &Inst : *Block) {
  991. auto Users = Inst.users();
  992. if (any_of(Users, [&](User *U) {
  993. auto *Use = cast<Instruction>(U);
  994. return !L->contains(Use->getParent());
  995. }))
  996. UsedOutside.push_back(&Inst);
  997. }
  998. return UsedOutside;
  999. }
  1000. void llvm::getLoopAnalysisUsage(AnalysisUsage &AU) {
  1001. // By definition, all loop passes need the LoopInfo analysis and the
  1002. // Dominator tree it depends on. Because they all participate in the loop
  1003. // pass manager, they must also preserve these.
  1004. AU.addRequired<DominatorTreeWrapperPass>();
  1005. AU.addPreserved<DominatorTreeWrapperPass>();
  1006. AU.addRequired<LoopInfoWrapperPass>();
  1007. AU.addPreserved<LoopInfoWrapperPass>();
  1008. // We must also preserve LoopSimplify and LCSSA. We locally access their IDs
  1009. // here because users shouldn't directly get them from this header.
  1010. extern char &LoopSimplifyID;
  1011. extern char &LCSSAID;
  1012. AU.addRequiredID(LoopSimplifyID);
  1013. AU.addPreservedID(LoopSimplifyID);
  1014. AU.addRequiredID(LCSSAID);
  1015. AU.addPreservedID(LCSSAID);
  1016. // This is used in the LPPassManager to perform LCSSA verification on passes
  1017. // which preserve lcssa form
  1018. AU.addRequired<LCSSAVerificationPass>();
  1019. AU.addPreserved<LCSSAVerificationPass>();
  1020. // Loop passes are designed to run inside of a loop pass manager which means
  1021. // that any function analyses they require must be required by the first loop
  1022. // pass in the manager (so that it is computed before the loop pass manager
  1023. // runs) and preserved by all loop pasess in the manager. To make this
  1024. // reasonably robust, the set needed for most loop passes is maintained here.
  1025. // If your loop pass requires an analysis not listed here, you will need to
  1026. // carefully audit the loop pass manager nesting structure that results.
  1027. AU.addRequired<AAResultsWrapperPass>();
  1028. AU.addPreserved<AAResultsWrapperPass>();
  1029. AU.addPreserved<BasicAAWrapperPass>();
  1030. AU.addPreserved<GlobalsAAWrapperPass>();
  1031. AU.addPreserved<SCEVAAWrapperPass>();
  1032. AU.addRequired<ScalarEvolutionWrapperPass>();
  1033. AU.addPreserved<ScalarEvolutionWrapperPass>();
  1034. }
  1035. /// Manually defined generic "LoopPass" dependency initialization. This is used
  1036. /// to initialize the exact set of passes from above in \c
  1037. /// getLoopAnalysisUsage. It can be used within a loop pass's initialization
  1038. /// with:
  1039. ///
  1040. /// INITIALIZE_PASS_DEPENDENCY(LoopPass)
  1041. ///
  1042. /// As-if "LoopPass" were a pass.
  1043. void llvm::initializeLoopPassPass(PassRegistry &Registry) {
  1044. INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
  1045. INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
  1046. INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
  1047. INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
  1048. INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
  1049. INITIALIZE_PASS_DEPENDENCY(BasicAAWrapperPass)
  1050. INITIALIZE_PASS_DEPENDENCY(GlobalsAAWrapperPass)
  1051. INITIALIZE_PASS_DEPENDENCY(SCEVAAWrapperPass)
  1052. INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
  1053. }
  1054. /// \brief Find string metadata for loop
  1055. ///
  1056. /// If it has a value (e.g. {"llvm.distribute", 1} return the value as an
  1057. /// operand or null otherwise. If the string metadata is not found return
  1058. /// Optional's not-a-value.
  1059. Optional<const MDOperand *> llvm::findStringMetadataForLoop(Loop *TheLoop,
  1060. StringRef Name) {
  1061. MDNode *LoopID = TheLoop->getLoopID();
  1062. // Return none if LoopID is false.
  1063. if (!LoopID)
  1064. return None;
  1065. // First operand should refer to the loop id itself.
  1066. assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
  1067. assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
  1068. // Iterate over LoopID operands and look for MDString Metadata
  1069. for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) {
  1070. MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i));
  1071. if (!MD)
  1072. continue;
  1073. MDString *S = dyn_cast<MDString>(MD->getOperand(0));
  1074. if (!S)
  1075. continue;
  1076. // Return true if MDString holds expected MetaData.
  1077. if (Name.equals(S->getString()))
  1078. switch (MD->getNumOperands()) {
  1079. case 1:
  1080. return nullptr;
  1081. case 2:
  1082. return &MD->getOperand(1);
  1083. default:
  1084. llvm_unreachable("loop metadata has 0 or 1 operand");
  1085. }
  1086. }
  1087. return None;
  1088. }
  1089. /// Does a BFS from a given node to all of its children inside a given loop.
  1090. /// The returned vector of nodes includes the starting point.
  1091. SmallVector<DomTreeNode *, 16>
  1092. llvm::collectChildrenInLoop(DomTreeNode *N, const Loop *CurLoop) {
  1093. SmallVector<DomTreeNode *, 16> Worklist;
  1094. auto AddRegionToWorklist = [&](DomTreeNode *DTN) {
  1095. // Only include subregions in the top level loop.
  1096. BasicBlock *BB = DTN->getBlock();
  1097. if (CurLoop->contains(BB))
  1098. Worklist.push_back(DTN);
  1099. };
  1100. AddRegionToWorklist(N);
  1101. for (size_t I = 0; I < Worklist.size(); I++)
  1102. for (DomTreeNode *Child : Worklist[I]->getChildren())
  1103. AddRegionToWorklist(Child);
  1104. return Worklist;
  1105. }
  1106. void llvm::deleteDeadLoop(Loop *L, DominatorTree *DT = nullptr,
  1107. ScalarEvolution *SE = nullptr,
  1108. LoopInfo *LI = nullptr) {
  1109. assert((!DT || L->isLCSSAForm(*DT)) && "Expected LCSSA!");
  1110. auto *Preheader = L->getLoopPreheader();
  1111. assert(Preheader && "Preheader should exist!");
  1112. // Now that we know the removal is safe, remove the loop by changing the
  1113. // branch from the preheader to go to the single exit block.
  1114. //
  1115. // Because we're deleting a large chunk of code at once, the sequence in which
  1116. // we remove things is very important to avoid invalidation issues.
  1117. // Tell ScalarEvolution that the loop is deleted. Do this before
  1118. // deleting the loop so that ScalarEvolution can look at the loop
  1119. // to determine what it needs to clean up.
  1120. if (SE)
  1121. SE->forgetLoop(L);
  1122. auto *ExitBlock = L->getUniqueExitBlock();
  1123. assert(ExitBlock && "Should have a unique exit block!");
  1124. assert(L->hasDedicatedExits() && "Loop should have dedicated exits!");
  1125. auto *OldBr = dyn_cast<BranchInst>(Preheader->getTerminator());
  1126. assert(OldBr && "Preheader must end with a branch");
  1127. assert(OldBr->isUnconditional() && "Preheader must have a single successor");
  1128. // Connect the preheader to the exit block. Keep the old edge to the header
  1129. // around to perform the dominator tree update in two separate steps
  1130. // -- #1 insertion of the edge preheader -> exit and #2 deletion of the edge
  1131. // preheader -> header.
  1132. //
  1133. //
  1134. // 0. Preheader 1. Preheader 2. Preheader
  1135. // | | | |
  1136. // V | V |
  1137. // Header <--\ | Header <--\ | Header <--\
  1138. // | | | | | | | | | | |
  1139. // | V | | | V | | | V |
  1140. // | Body --/ | | Body --/ | | Body --/
  1141. // V V V V V
  1142. // Exit Exit Exit
  1143. //
  1144. // By doing this is two separate steps we can perform the dominator tree
  1145. // update without using the batch update API.
  1146. //
  1147. // Even when the loop is never executed, we cannot remove the edge from the
  1148. // source block to the exit block. Consider the case where the unexecuted loop
  1149. // branches back to an outer loop. If we deleted the loop and removed the edge
  1150. // coming to this inner loop, this will break the outer loop structure (by
  1151. // deleting the backedge of the outer loop). If the outer loop is indeed a
  1152. // non-loop, it will be deleted in a future iteration of loop deletion pass.
  1153. IRBuilder<> Builder(OldBr);
  1154. Builder.CreateCondBr(Builder.getFalse(), L->getHeader(), ExitBlock);
  1155. // Remove the old branch. The conditional branch becomes a new terminator.
  1156. OldBr->eraseFromParent();
  1157. // Rewrite phis in the exit block to get their inputs from the Preheader
  1158. // instead of the exiting block.
  1159. for (PHINode &P : ExitBlock->phis()) {
  1160. // Set the zero'th element of Phi to be from the preheader and remove all
  1161. // other incoming values. Given the loop has dedicated exits, all other
  1162. // incoming values must be from the exiting blocks.
  1163. int PredIndex = 0;
  1164. P.setIncomingBlock(PredIndex, Preheader);
  1165. // Removes all incoming values from all other exiting blocks (including
  1166. // duplicate values from an exiting block).
  1167. // Nuke all entries except the zero'th entry which is the preheader entry.
  1168. // NOTE! We need to remove Incoming Values in the reverse order as done
  1169. // below, to keep the indices valid for deletion (removeIncomingValues
  1170. // updates getNumIncomingValues and shifts all values down into the operand
  1171. // being deleted).
  1172. for (unsigned i = 0, e = P.getNumIncomingValues() - 1; i != e; ++i)
  1173. P.removeIncomingValue(e - i, false);
  1174. assert((P.getNumIncomingValues() == 1 &&
  1175. P.getIncomingBlock(PredIndex) == Preheader) &&
  1176. "Should have exactly one value and that's from the preheader!");
  1177. }
  1178. // Disconnect the loop body by branching directly to its exit.
  1179. Builder.SetInsertPoint(Preheader->getTerminator());
  1180. Builder.CreateBr(ExitBlock);
  1181. // Remove the old branch.
  1182. Preheader->getTerminator()->eraseFromParent();
  1183. if (DT) {
  1184. // Update the dominator tree by informing it about the new edge from the
  1185. // preheader to the exit.
  1186. DT->insertEdge(Preheader, ExitBlock);
  1187. // Inform the dominator tree about the removed edge.
  1188. DT->deleteEdge(Preheader, L->getHeader());
  1189. }
  1190. // Remove the block from the reference counting scheme, so that we can
  1191. // delete it freely later.
  1192. for (auto *Block : L->blocks())
  1193. Block->dropAllReferences();
  1194. if (LI) {
  1195. // Erase the instructions and the blocks without having to worry
  1196. // about ordering because we already dropped the references.
  1197. // NOTE: This iteration is safe because erasing the block does not remove
  1198. // its entry from the loop's block list. We do that in the next section.
  1199. for (Loop::block_iterator LpI = L->block_begin(), LpE = L->block_end();
  1200. LpI != LpE; ++LpI)
  1201. (*LpI)->eraseFromParent();
  1202. // Finally, the blocks from loopinfo. This has to happen late because
  1203. // otherwise our loop iterators won't work.
  1204. SmallPtrSet<BasicBlock *, 8> blocks;
  1205. blocks.insert(L->block_begin(), L->block_end());
  1206. for (BasicBlock *BB : blocks)
  1207. LI->removeBlock(BB);
  1208. // The last step is to update LoopInfo now that we've eliminated this loop.
  1209. LI->erase(L);
  1210. }
  1211. }
  1212. /// Returns true if the instruction in a loop is guaranteed to execute at least
  1213. /// once.
  1214. bool llvm::isGuaranteedToExecute(const Instruction &Inst,
  1215. const DominatorTree *DT, const Loop *CurLoop,
  1216. const LoopSafetyInfo *SafetyInfo) {
  1217. // We have to check to make sure that the instruction dominates all
  1218. // of the exit blocks. If it doesn't, then there is a path out of the loop
  1219. // which does not execute this instruction, so we can't hoist it.
  1220. // If the instruction is in the header block for the loop (which is very
  1221. // common), it is always guaranteed to dominate the exit blocks. Since this
  1222. // is a common case, and can save some work, check it now.
  1223. if (Inst.getParent() == CurLoop->getHeader())
  1224. // If there's a throw in the header block, we can't guarantee we'll reach
  1225. // Inst.
  1226. return !SafetyInfo->HeaderMayThrow;
  1227. // Somewhere in this loop there is an instruction which may throw and make us
  1228. // exit the loop.
  1229. if (SafetyInfo->MayThrow)
  1230. return false;
  1231. // Get the exit blocks for the current loop.
  1232. SmallVector<BasicBlock *, 8> ExitBlocks;
  1233. CurLoop->getExitBlocks(ExitBlocks);
  1234. // Verify that the block dominates each of the exit blocks of the loop.
  1235. for (BasicBlock *ExitBlock : ExitBlocks)
  1236. if (!DT->dominates(Inst.getParent(), ExitBlock))
  1237. return false;
  1238. // As a degenerate case, if the loop is statically infinite then we haven't
  1239. // proven anything since there are no exit blocks.
  1240. if (ExitBlocks.empty())
  1241. return false;
  1242. // FIXME: In general, we have to prove that the loop isn't an infinite loop.
  1243. // See http::llvm.org/PR24078 . (The "ExitBlocks.empty()" check above is
  1244. // just a special case of this.)
  1245. return true;
  1246. }
  1247. Optional<unsigned> llvm::getLoopEstimatedTripCount(Loop *L) {
  1248. // Only support loops with a unique exiting block, and a latch.
  1249. if (!L->getExitingBlock())
  1250. return None;
  1251. // Get the branch weights for the the loop's backedge.
  1252. BranchInst *LatchBR =
  1253. dyn_cast<BranchInst>(L->getLoopLatch()->getTerminator());
  1254. if (!LatchBR || LatchBR->getNumSuccessors() != 2)
  1255. return None;
  1256. assert((LatchBR->getSuccessor(0) == L->getHeader() ||
  1257. LatchBR->getSuccessor(1) == L->getHeader()) &&
  1258. "At least one edge out of the latch must go to the header");
  1259. // To estimate the number of times the loop body was executed, we want to
  1260. // know the number of times the backedge was taken, vs. the number of times
  1261. // we exited the loop.
  1262. uint64_t TrueVal, FalseVal;
  1263. if (!LatchBR->extractProfMetadata(TrueVal, FalseVal))
  1264. return None;
  1265. if (!TrueVal || !FalseVal)
  1266. return 0;
  1267. // Divide the count of the backedge by the count of the edge exiting the loop,
  1268. // rounding to nearest.
  1269. if (LatchBR->getSuccessor(0) == L->getHeader())
  1270. return (TrueVal + (FalseVal / 2)) / FalseVal;
  1271. else
  1272. return (FalseVal + (TrueVal / 2)) / TrueVal;
  1273. }
  1274. /// \brief Adds a 'fast' flag to floating point operations.
  1275. static Value *addFastMathFlag(Value *V) {
  1276. if (isa<FPMathOperator>(V)) {
  1277. FastMathFlags Flags;
  1278. Flags.setFast();
  1279. cast<Instruction>(V)->setFastMathFlags(Flags);
  1280. }
  1281. return V;
  1282. }
  1283. // Helper to generate a log2 shuffle reduction.
  1284. Value *
  1285. llvm::getShuffleReduction(IRBuilder<> &Builder, Value *Src, unsigned Op,
  1286. RecurrenceDescriptor::MinMaxRecurrenceKind MinMaxKind,
  1287. ArrayRef<Value *> RedOps) {
  1288. unsigned VF = Src->getType()->getVectorNumElements();
  1289. // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
  1290. // and vector ops, reducing the set of values being computed by half each
  1291. // round.
  1292. assert(isPowerOf2_32(VF) &&
  1293. "Reduction emission only supported for pow2 vectors!");
  1294. Value *TmpVec = Src;
  1295. SmallVector<Constant *, 32> ShuffleMask(VF, nullptr);
  1296. for (unsigned i = VF; i != 1; i >>= 1) {
  1297. // Move the upper half of the vector to the lower half.
  1298. for (unsigned j = 0; j != i / 2; ++j)
  1299. ShuffleMask[j] = Builder.getInt32(i / 2 + j);
  1300. // Fill the rest of the mask with undef.
  1301. std::fill(&ShuffleMask[i / 2], ShuffleMask.end(),
  1302. UndefValue::get(Builder.getInt32Ty()));
  1303. Value *Shuf = Builder.CreateShuffleVector(
  1304. TmpVec, UndefValue::get(TmpVec->getType()),
  1305. ConstantVector::get(ShuffleMask), "rdx.shuf");
  1306. if (Op != Instruction::ICmp && Op != Instruction::FCmp) {
  1307. // Floating point operations had to be 'fast' to enable the reduction.
  1308. TmpVec = addFastMathFlag(Builder.CreateBinOp((Instruction::BinaryOps)Op,
  1309. TmpVec, Shuf, "bin.rdx"));
  1310. } else {
  1311. assert(MinMaxKind != RecurrenceDescriptor::MRK_Invalid &&
  1312. "Invalid min/max");
  1313. TmpVec = RecurrenceDescriptor::createMinMaxOp(Builder, MinMaxKind, TmpVec,
  1314. Shuf);
  1315. }
  1316. if (!RedOps.empty())
  1317. propagateIRFlags(TmpVec, RedOps);
  1318. }
  1319. // The result is in the first element of the vector.
  1320. return Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
  1321. }
  1322. /// Create a simple vector reduction specified by an opcode and some
  1323. /// flags (if generating min/max reductions).
  1324. Value *llvm::createSimpleTargetReduction(
  1325. IRBuilder<> &Builder, const TargetTransformInfo *TTI, unsigned Opcode,
  1326. Value *Src, TargetTransformInfo::ReductionFlags Flags,
  1327. ArrayRef<Value *> RedOps) {
  1328. assert(isa<VectorType>(Src->getType()) && "Type must be a vector");
  1329. Value *ScalarUdf = UndefValue::get(Src->getType()->getVectorElementType());
  1330. std::function<Value*()> BuildFunc;
  1331. using RD = RecurrenceDescriptor;
  1332. RD::MinMaxRecurrenceKind MinMaxKind = RD::MRK_Invalid;
  1333. // TODO: Support creating ordered reductions.
  1334. FastMathFlags FMFFast;
  1335. FMFFast.setFast();
  1336. switch (Opcode) {
  1337. case Instruction::Add:
  1338. BuildFunc = [&]() { return Builder.CreateAddReduce(Src); };
  1339. break;
  1340. case Instruction::Mul:
  1341. BuildFunc = [&]() { return Builder.CreateMulReduce(Src); };
  1342. break;
  1343. case Instruction::And:
  1344. BuildFunc = [&]() { return Builder.CreateAndReduce(Src); };
  1345. break;
  1346. case Instruction::Or:
  1347. BuildFunc = [&]() { return Builder.CreateOrReduce(Src); };
  1348. break;
  1349. case Instruction::Xor:
  1350. BuildFunc = [&]() { return Builder.CreateXorReduce(Src); };
  1351. break;
  1352. case Instruction::FAdd:
  1353. BuildFunc = [&]() {
  1354. auto Rdx = Builder.CreateFAddReduce(ScalarUdf, Src);
  1355. cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
  1356. return Rdx;
  1357. };
  1358. break;
  1359. case Instruction::FMul:
  1360. BuildFunc = [&]() {
  1361. auto Rdx = Builder.CreateFMulReduce(ScalarUdf, Src);
  1362. cast<CallInst>(Rdx)->setFastMathFlags(FMFFast);
  1363. return Rdx;
  1364. };
  1365. break;
  1366. case Instruction::ICmp:
  1367. if (Flags.IsMaxOp) {
  1368. MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMax : RD::MRK_UIntMax;
  1369. BuildFunc = [&]() {
  1370. return Builder.CreateIntMaxReduce(Src, Flags.IsSigned);
  1371. };
  1372. } else {
  1373. MinMaxKind = Flags.IsSigned ? RD::MRK_SIntMin : RD::MRK_UIntMin;
  1374. BuildFunc = [&]() {
  1375. return Builder.CreateIntMinReduce(Src, Flags.IsSigned);
  1376. };
  1377. }
  1378. break;
  1379. case Instruction::FCmp:
  1380. if (Flags.IsMaxOp) {
  1381. MinMaxKind = RD::MRK_FloatMax;
  1382. BuildFunc = [&]() { return Builder.CreateFPMaxReduce(Src, Flags.NoNaN); };
  1383. } else {
  1384. MinMaxKind = RD::MRK_FloatMin;
  1385. BuildFunc = [&]() { return Builder.CreateFPMinReduce(Src, Flags.NoNaN); };
  1386. }
  1387. break;
  1388. default:
  1389. llvm_unreachable("Unhandled opcode");
  1390. break;
  1391. }
  1392. if (TTI->useReductionIntrinsic(Opcode, Src->getType(), Flags))
  1393. return BuildFunc();
  1394. return getShuffleReduction(Builder, Src, Opcode, MinMaxKind, RedOps);
  1395. }
  1396. /// Create a vector reduction using a given recurrence descriptor.
  1397. Value *llvm::createTargetReduction(IRBuilder<> &B,
  1398. const TargetTransformInfo *TTI,
  1399. RecurrenceDescriptor &Desc, Value *Src,
  1400. bool NoNaN) {
  1401. // TODO: Support in-order reductions based on the recurrence descriptor.
  1402. using RD = RecurrenceDescriptor;
  1403. RD::RecurrenceKind RecKind = Desc.getRecurrenceKind();
  1404. TargetTransformInfo::ReductionFlags Flags;
  1405. Flags.NoNaN = NoNaN;
  1406. switch (RecKind) {
  1407. case RD::RK_FloatAdd:
  1408. return createSimpleTargetReduction(B, TTI, Instruction::FAdd, Src, Flags);
  1409. case RD::RK_FloatMult:
  1410. return createSimpleTargetReduction(B, TTI, Instruction::FMul, Src, Flags);
  1411. case RD::RK_IntegerAdd:
  1412. return createSimpleTargetReduction(B, TTI, Instruction::Add, Src, Flags);
  1413. case RD::RK_IntegerMult:
  1414. return createSimpleTargetReduction(B, TTI, Instruction::Mul, Src, Flags);
  1415. case RD::RK_IntegerAnd:
  1416. return createSimpleTargetReduction(B, TTI, Instruction::And, Src, Flags);
  1417. case RD::RK_IntegerOr:
  1418. return createSimpleTargetReduction(B, TTI, Instruction::Or, Src, Flags);
  1419. case RD::RK_IntegerXor:
  1420. return createSimpleTargetReduction(B, TTI, Instruction::Xor, Src, Flags);
  1421. case RD::RK_IntegerMinMax: {
  1422. RD::MinMaxRecurrenceKind MMKind = Desc.getMinMaxRecurrenceKind();
  1423. Flags.IsMaxOp = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_UIntMax);
  1424. Flags.IsSigned = (MMKind == RD::MRK_SIntMax || MMKind == RD::MRK_SIntMin);
  1425. return createSimpleTargetReduction(B, TTI, Instruction::ICmp, Src, Flags);
  1426. }
  1427. case RD::RK_FloatMinMax: {
  1428. Flags.IsMaxOp = Desc.getMinMaxRecurrenceKind() == RD::MRK_FloatMax;
  1429. return createSimpleTargetReduction(B, TTI, Instruction::FCmp, Src, Flags);
  1430. }
  1431. default:
  1432. llvm_unreachable("Unhandled RecKind");
  1433. }
  1434. }
  1435. void llvm::propagateIRFlags(Value *I, ArrayRef<Value *> VL, Value *OpValue) {
  1436. auto *VecOp = dyn_cast<Instruction>(I);
  1437. if (!VecOp)
  1438. return;
  1439. auto *Intersection = (OpValue == nullptr) ? dyn_cast<Instruction>(VL[0])
  1440. : dyn_cast<Instruction>(OpValue);
  1441. if (!Intersection)
  1442. return;
  1443. const unsigned Opcode = Intersection->getOpcode();
  1444. VecOp->copyIRFlags(Intersection);
  1445. for (auto *V : VL) {
  1446. auto *Instr = dyn_cast<Instruction>(V);
  1447. if (!Instr)
  1448. continue;
  1449. if (OpValue == nullptr || Opcode == Instr->getOpcode())
  1450. VecOp->andIRFlags(V);
  1451. }
  1452. }