InstCombineVectorOps.cpp 39 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049
  1. //===- InstCombineVectorOps.cpp -------------------------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements instcombine for ExtractElement, InsertElement and
  11. // ShuffleVector.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "InstCombine.h"
  15. #include "llvm/Support/PatternMatch.h"
  16. using namespace llvm;
  17. using namespace PatternMatch;
  18. /// CheapToScalarize - Return true if the value is cheaper to scalarize than it
  19. /// is to leave as a vector operation. isConstant indicates whether we're
  20. /// extracting one known element. If false we're extracting a variable index.
  21. static bool CheapToScalarize(Value *V, bool isConstant) {
  22. if (Constant *C = dyn_cast<Constant>(V)) {
  23. if (isConstant) return true;
  24. // If all elts are the same, we can extract it and use any of the values.
  25. Constant *Op0 = C->getAggregateElement(0U);
  26. for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; ++i)
  27. if (C->getAggregateElement(i) != Op0)
  28. return false;
  29. return true;
  30. }
  31. Instruction *I = dyn_cast<Instruction>(V);
  32. if (!I) return false;
  33. // Insert element gets simplified to the inserted element or is deleted if
  34. // this is constant idx extract element and its a constant idx insertelt.
  35. if (I->getOpcode() == Instruction::InsertElement && isConstant &&
  36. isa<ConstantInt>(I->getOperand(2)))
  37. return true;
  38. if (I->getOpcode() == Instruction::Load && I->hasOneUse())
  39. return true;
  40. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I))
  41. if (BO->hasOneUse() &&
  42. (CheapToScalarize(BO->getOperand(0), isConstant) ||
  43. CheapToScalarize(BO->getOperand(1), isConstant)))
  44. return true;
  45. if (CmpInst *CI = dyn_cast<CmpInst>(I))
  46. if (CI->hasOneUse() &&
  47. (CheapToScalarize(CI->getOperand(0), isConstant) ||
  48. CheapToScalarize(CI->getOperand(1), isConstant)))
  49. return true;
  50. return false;
  51. }
  52. /// FindScalarElement - Given a vector and an element number, see if the scalar
  53. /// value is already around as a register, for example if it were inserted then
  54. /// extracted from the vector.
  55. static Value *FindScalarElement(Value *V, unsigned EltNo) {
  56. assert(V->getType()->isVectorTy() && "Not looking at a vector?");
  57. VectorType *VTy = cast<VectorType>(V->getType());
  58. unsigned Width = VTy->getNumElements();
  59. if (EltNo >= Width) // Out of range access.
  60. return UndefValue::get(VTy->getElementType());
  61. if (Constant *C = dyn_cast<Constant>(V))
  62. return C->getAggregateElement(EltNo);
  63. if (InsertElementInst *III = dyn_cast<InsertElementInst>(V)) {
  64. // If this is an insert to a variable element, we don't know what it is.
  65. if (!isa<ConstantInt>(III->getOperand(2)))
  66. return 0;
  67. unsigned IIElt = cast<ConstantInt>(III->getOperand(2))->getZExtValue();
  68. // If this is an insert to the element we are looking for, return the
  69. // inserted value.
  70. if (EltNo == IIElt)
  71. return III->getOperand(1);
  72. // Otherwise, the insertelement doesn't modify the value, recurse on its
  73. // vector input.
  74. return FindScalarElement(III->getOperand(0), EltNo);
  75. }
  76. if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(V)) {
  77. unsigned LHSWidth = SVI->getOperand(0)->getType()->getVectorNumElements();
  78. int InEl = SVI->getMaskValue(EltNo);
  79. if (InEl < 0)
  80. return UndefValue::get(VTy->getElementType());
  81. if (InEl < (int)LHSWidth)
  82. return FindScalarElement(SVI->getOperand(0), InEl);
  83. return FindScalarElement(SVI->getOperand(1), InEl - LHSWidth);
  84. }
  85. // Extract a value from a vector add operation with a constant zero.
  86. Value *Val = 0; Constant *Con = 0;
  87. if (match(V, m_Add(m_Value(Val), m_Constant(Con)))) {
  88. if (Con->getAggregateElement(EltNo)->isNullValue())
  89. return FindScalarElement(Val, EltNo);
  90. }
  91. // Otherwise, we don't know.
  92. return 0;
  93. }
  94. // If we have a PHI node with a vector type that has only 2 uses: feed
  95. // itself and be an operand of extractelement at a constant location,
  96. // try to replace the PHI of the vector type with a PHI of a scalar type.
  97. Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) {
  98. // Verify that the PHI node has exactly 2 uses. Otherwise return NULL.
  99. if (!PN->hasNUses(2))
  100. return NULL;
  101. // If so, it's known at this point that one operand is PHI and the other is
  102. // an extractelement node. Find the PHI user that is not the extractelement
  103. // node.
  104. Value::use_iterator iu = PN->use_begin();
  105. Instruction *PHIUser = dyn_cast<Instruction>(*iu);
  106. if (PHIUser == cast<Instruction>(&EI))
  107. PHIUser = cast<Instruction>(*(++iu));
  108. // Verify that this PHI user has one use, which is the PHI itself,
  109. // and that it is a binary operation which is cheap to scalarize.
  110. // otherwise return NULL.
  111. if (!PHIUser->hasOneUse() || !(PHIUser->use_back() == PN) ||
  112. !(isa<BinaryOperator>(PHIUser)) || !CheapToScalarize(PHIUser, true))
  113. return NULL;
  114. // Create a scalar PHI node that will replace the vector PHI node
  115. // just before the current PHI node.
  116. PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith(
  117. PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN));
  118. // Scalarize each PHI operand.
  119. for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) {
  120. Value *PHIInVal = PN->getIncomingValue(i);
  121. BasicBlock *inBB = PN->getIncomingBlock(i);
  122. Value *Elt = EI.getIndexOperand();
  123. // If the operand is the PHI induction variable:
  124. if (PHIInVal == PHIUser) {
  125. // Scalarize the binary operation. Its first operand is the
  126. // scalar PHI and the second operand is extracted from the other
  127. // vector operand.
  128. BinaryOperator *B0 = cast<BinaryOperator>(PHIUser);
  129. unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0;
  130. Value *Op = InsertNewInstWith(
  131. ExtractElementInst::Create(B0->getOperand(opId), Elt,
  132. B0->getOperand(opId)->getName() + ".Elt"),
  133. *B0);
  134. Value *newPHIUser = InsertNewInstWith(
  135. BinaryOperator::Create(B0->getOpcode(), scalarPHI, Op), *B0);
  136. scalarPHI->addIncoming(newPHIUser, inBB);
  137. } else {
  138. // Scalarize PHI input:
  139. Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, "");
  140. // Insert the new instruction into the predecessor basic block.
  141. Instruction *pos = dyn_cast<Instruction>(PHIInVal);
  142. BasicBlock::iterator InsertPos;
  143. if (pos && !isa<PHINode>(pos)) {
  144. InsertPos = pos;
  145. ++InsertPos;
  146. } else {
  147. InsertPos = inBB->getFirstInsertionPt();
  148. }
  149. InsertNewInstWith(newEI, *InsertPos);
  150. scalarPHI->addIncoming(newEI, inBB);
  151. }
  152. }
  153. return ReplaceInstUsesWith(EI, scalarPHI);
  154. }
  155. Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) {
  156. // If vector val is constant with all elements the same, replace EI with
  157. // that element. We handle a known element # below.
  158. if (Constant *C = dyn_cast<Constant>(EI.getOperand(0)))
  159. if (CheapToScalarize(C, false))
  160. return ReplaceInstUsesWith(EI, C->getAggregateElement(0U));
  161. // If extracting a specified index from the vector, see if we can recursively
  162. // find a previously computed scalar that was inserted into the vector.
  163. if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) {
  164. unsigned IndexVal = IdxC->getZExtValue();
  165. unsigned VectorWidth = EI.getVectorOperandType()->getNumElements();
  166. // If this is extracting an invalid index, turn this into undef, to avoid
  167. // crashing the code below.
  168. if (IndexVal >= VectorWidth)
  169. return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
  170. // This instruction only demands the single element from the input vector.
  171. // If the input vector has a single use, simplify it based on this use
  172. // property.
  173. if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) {
  174. APInt UndefElts(VectorWidth, 0);
  175. APInt DemandedMask(VectorWidth, 0);
  176. DemandedMask.setBit(IndexVal);
  177. if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0),
  178. DemandedMask, UndefElts)) {
  179. EI.setOperand(0, V);
  180. return &EI;
  181. }
  182. }
  183. if (Value *Elt = FindScalarElement(EI.getOperand(0), IndexVal))
  184. return ReplaceInstUsesWith(EI, Elt);
  185. // If the this extractelement is directly using a bitcast from a vector of
  186. // the same number of elements, see if we can find the source element from
  187. // it. In this case, we will end up needing to bitcast the scalars.
  188. if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) {
  189. if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType()))
  190. if (VT->getNumElements() == VectorWidth)
  191. if (Value *Elt = FindScalarElement(BCI->getOperand(0), IndexVal))
  192. return new BitCastInst(Elt, EI.getType());
  193. }
  194. // If there's a vector PHI feeding a scalar use through this extractelement
  195. // instruction, try to scalarize the PHI.
  196. if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) {
  197. Instruction *scalarPHI = scalarizePHI(EI, PN);
  198. if (scalarPHI)
  199. return scalarPHI;
  200. }
  201. }
  202. if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) {
  203. // Push extractelement into predecessor operation if legal and
  204. // profitable to do so
  205. if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) {
  206. if (I->hasOneUse() &&
  207. CheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) {
  208. Value *newEI0 =
  209. Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1),
  210. EI.getName()+".lhs");
  211. Value *newEI1 =
  212. Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1),
  213. EI.getName()+".rhs");
  214. return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1);
  215. }
  216. } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) {
  217. // Extracting the inserted element?
  218. if (IE->getOperand(2) == EI.getOperand(1))
  219. return ReplaceInstUsesWith(EI, IE->getOperand(1));
  220. // If the inserted and extracted elements are constants, they must not
  221. // be the same value, extract from the pre-inserted value instead.
  222. if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) {
  223. Worklist.AddValue(EI.getOperand(0));
  224. EI.setOperand(0, IE->getOperand(0));
  225. return &EI;
  226. }
  227. } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) {
  228. // If this is extracting an element from a shufflevector, figure out where
  229. // it came from and extract from the appropriate input element instead.
  230. if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) {
  231. int SrcIdx = SVI->getMaskValue(Elt->getZExtValue());
  232. Value *Src;
  233. unsigned LHSWidth =
  234. SVI->getOperand(0)->getType()->getVectorNumElements();
  235. if (SrcIdx < 0)
  236. return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType()));
  237. if (SrcIdx < (int)LHSWidth)
  238. Src = SVI->getOperand(0);
  239. else {
  240. SrcIdx -= LHSWidth;
  241. Src = SVI->getOperand(1);
  242. }
  243. Type *Int32Ty = Type::getInt32Ty(EI.getContext());
  244. return ExtractElementInst::Create(Src,
  245. ConstantInt::get(Int32Ty,
  246. SrcIdx, false));
  247. }
  248. } else if (CastInst *CI = dyn_cast<CastInst>(I)) {
  249. // Canonicalize extractelement(cast) -> cast(extractelement)
  250. // bitcasts can change the number of vector elements and they cost nothing
  251. if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) {
  252. Value *EE = Builder->CreateExtractElement(CI->getOperand(0),
  253. EI.getIndexOperand());
  254. Worklist.AddValue(EE);
  255. return CastInst::Create(CI->getOpcode(), EE, EI.getType());
  256. }
  257. } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
  258. if (SI->hasOneUse()) {
  259. // TODO: For a select on vectors, it might be useful to do this if it
  260. // has multiple extractelement uses. For vector select, that seems to
  261. // fight the vectorizer.
  262. // If we are extracting an element from a vector select or a select on
  263. // vectors, a select on the scalars extracted from the vector arguments.
  264. Value *TrueVal = SI->getTrueValue();
  265. Value *FalseVal = SI->getFalseValue();
  266. Value *Cond = SI->getCondition();
  267. if (Cond->getType()->isVectorTy()) {
  268. Cond = Builder->CreateExtractElement(Cond,
  269. EI.getIndexOperand(),
  270. Cond->getName() + ".elt");
  271. }
  272. Value *V1Elem
  273. = Builder->CreateExtractElement(TrueVal,
  274. EI.getIndexOperand(),
  275. TrueVal->getName() + ".elt");
  276. Value *V2Elem
  277. = Builder->CreateExtractElement(FalseVal,
  278. EI.getIndexOperand(),
  279. FalseVal->getName() + ".elt");
  280. return SelectInst::Create(Cond,
  281. V1Elem,
  282. V2Elem,
  283. SI->getName() + ".elt");
  284. }
  285. }
  286. }
  287. return 0;
  288. }
  289. /// CollectSingleShuffleElements - If V is a shuffle of values that ONLY returns
  290. /// elements from either LHS or RHS, return the shuffle mask and true.
  291. /// Otherwise, return false.
  292. static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
  293. SmallVectorImpl<Constant*> &Mask) {
  294. assert(V->getType() == LHS->getType() && V->getType() == RHS->getType() &&
  295. "Invalid CollectSingleShuffleElements");
  296. unsigned NumElts = V->getType()->getVectorNumElements();
  297. if (isa<UndefValue>(V)) {
  298. Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
  299. return true;
  300. }
  301. if (V == LHS) {
  302. for (unsigned i = 0; i != NumElts; ++i)
  303. Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
  304. return true;
  305. }
  306. if (V == RHS) {
  307. for (unsigned i = 0; i != NumElts; ++i)
  308. Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()),
  309. i+NumElts));
  310. return true;
  311. }
  312. if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
  313. // If this is an insert of an extract from some other vector, include it.
  314. Value *VecOp = IEI->getOperand(0);
  315. Value *ScalarOp = IEI->getOperand(1);
  316. Value *IdxOp = IEI->getOperand(2);
  317. if (!isa<ConstantInt>(IdxOp))
  318. return false;
  319. unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
  320. if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector.
  321. // Okay, we can handle this if the vector we are insertinting into is
  322. // transitively ok.
  323. if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
  324. // If so, update the mask to reflect the inserted undef.
  325. Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext()));
  326. return true;
  327. }
  328. } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){
  329. if (isa<ConstantInt>(EI->getOperand(1)) &&
  330. EI->getOperand(0)->getType() == V->getType()) {
  331. unsigned ExtractedIdx =
  332. cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
  333. // This must be extracting from either LHS or RHS.
  334. if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) {
  335. // Okay, we can handle this if the vector we are insertinting into is
  336. // transitively ok.
  337. if (CollectSingleShuffleElements(VecOp, LHS, RHS, Mask)) {
  338. // If so, update the mask to reflect the inserted value.
  339. if (EI->getOperand(0) == LHS) {
  340. Mask[InsertedIdx % NumElts] =
  341. ConstantInt::get(Type::getInt32Ty(V->getContext()),
  342. ExtractedIdx);
  343. } else {
  344. assert(EI->getOperand(0) == RHS);
  345. Mask[InsertedIdx % NumElts] =
  346. ConstantInt::get(Type::getInt32Ty(V->getContext()),
  347. ExtractedIdx+NumElts);
  348. }
  349. return true;
  350. }
  351. }
  352. }
  353. }
  354. }
  355. // TODO: Handle shufflevector here!
  356. return false;
  357. }
  358. /// CollectShuffleElements - We are building a shuffle of V, using RHS as the
  359. /// RHS of the shuffle instruction, if it is not null. Return a shuffle mask
  360. /// that computes V and the LHS value of the shuffle.
  361. static Value *CollectShuffleElements(Value *V, SmallVectorImpl<Constant*> &Mask,
  362. Value *&RHS) {
  363. assert(V->getType()->isVectorTy() &&
  364. (RHS == 0 || V->getType() == RHS->getType()) &&
  365. "Invalid shuffle!");
  366. unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
  367. if (isa<UndefValue>(V)) {
  368. Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext())));
  369. return V;
  370. }
  371. if (isa<ConstantAggregateZero>(V)) {
  372. Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0));
  373. return V;
  374. }
  375. if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) {
  376. // If this is an insert of an extract from some other vector, include it.
  377. Value *VecOp = IEI->getOperand(0);
  378. Value *ScalarOp = IEI->getOperand(1);
  379. Value *IdxOp = IEI->getOperand(2);
  380. if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
  381. if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
  382. EI->getOperand(0)->getType() == V->getType()) {
  383. unsigned ExtractedIdx =
  384. cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
  385. unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
  386. // Either the extracted from or inserted into vector must be RHSVec,
  387. // otherwise we'd end up with a shuffle of three inputs.
  388. if (EI->getOperand(0) == RHS || RHS == 0) {
  389. RHS = EI->getOperand(0);
  390. Value *V = CollectShuffleElements(VecOp, Mask, RHS);
  391. Mask[InsertedIdx % NumElts] =
  392. ConstantInt::get(Type::getInt32Ty(V->getContext()),
  393. NumElts+ExtractedIdx);
  394. return V;
  395. }
  396. if (VecOp == RHS) {
  397. Value *V = CollectShuffleElements(EI->getOperand(0), Mask, RHS);
  398. // Update Mask to reflect that `ScalarOp' has been inserted at
  399. // position `InsertedIdx' within the vector returned by IEI.
  400. Mask[InsertedIdx % NumElts] = Mask[ExtractedIdx];
  401. // Everything but the extracted element is replaced with the RHS.
  402. for (unsigned i = 0; i != NumElts; ++i) {
  403. if (i != InsertedIdx)
  404. Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()),
  405. NumElts+i);
  406. }
  407. return V;
  408. }
  409. // If this insertelement is a chain that comes from exactly these two
  410. // vectors, return the vector and the effective shuffle.
  411. if (CollectSingleShuffleElements(IEI, EI->getOperand(0), RHS, Mask))
  412. return EI->getOperand(0);
  413. }
  414. }
  415. }
  416. // TODO: Handle shufflevector here!
  417. // Otherwise, can't do anything fancy. Return an identity vector.
  418. for (unsigned i = 0; i != NumElts; ++i)
  419. Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i));
  420. return V;
  421. }
  422. Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) {
  423. Value *VecOp = IE.getOperand(0);
  424. Value *ScalarOp = IE.getOperand(1);
  425. Value *IdxOp = IE.getOperand(2);
  426. // Inserting an undef or into an undefined place, remove this.
  427. if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp))
  428. ReplaceInstUsesWith(IE, VecOp);
  429. // If the inserted element was extracted from some other vector, and if the
  430. // indexes are constant, try to turn this into a shufflevector operation.
  431. if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) {
  432. if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp) &&
  433. EI->getOperand(0)->getType() == IE.getType()) {
  434. unsigned NumVectorElts = IE.getType()->getNumElements();
  435. unsigned ExtractedIdx =
  436. cast<ConstantInt>(EI->getOperand(1))->getZExtValue();
  437. unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue();
  438. if (ExtractedIdx >= NumVectorElts) // Out of range extract.
  439. return ReplaceInstUsesWith(IE, VecOp);
  440. if (InsertedIdx >= NumVectorElts) // Out of range insert.
  441. return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType()));
  442. // If we are extracting a value from a vector, then inserting it right
  443. // back into the same place, just use the input vector.
  444. if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx)
  445. return ReplaceInstUsesWith(IE, VecOp);
  446. // If this insertelement isn't used by some other insertelement, turn it
  447. // (and any insertelements it points to), into one big shuffle.
  448. if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.use_back())) {
  449. SmallVector<Constant*, 16> Mask;
  450. Value *RHS = 0;
  451. Value *LHS = CollectShuffleElements(&IE, Mask, RHS);
  452. if (RHS == 0) RHS = UndefValue::get(LHS->getType());
  453. // We now have a shuffle of LHS, RHS, Mask.
  454. return new ShuffleVectorInst(LHS, RHS, ConstantVector::get(Mask));
  455. }
  456. }
  457. }
  458. unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements();
  459. APInt UndefElts(VWidth, 0);
  460. APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
  461. if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) {
  462. if (V != &IE)
  463. return ReplaceInstUsesWith(IE, V);
  464. return &IE;
  465. }
  466. return 0;
  467. }
  468. /// Return true if we can evaluate the specified expression tree if the vector
  469. /// elements were shuffled in a different order.
  470. static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask,
  471. unsigned Depth = 5) {
  472. // We can always reorder the elements of a constant.
  473. if (isa<Constant>(V))
  474. return true;
  475. // We won't reorder vector arguments. No IPO here.
  476. Instruction *I = dyn_cast<Instruction>(V);
  477. if (!I) return false;
  478. // Two users may expect different orders of the elements. Don't try it.
  479. if (!I->hasOneUse())
  480. return false;
  481. if (Depth == 0) return false;
  482. switch (I->getOpcode()) {
  483. case Instruction::Add:
  484. case Instruction::FAdd:
  485. case Instruction::Sub:
  486. case Instruction::FSub:
  487. case Instruction::Mul:
  488. case Instruction::FMul:
  489. case Instruction::UDiv:
  490. case Instruction::SDiv:
  491. case Instruction::FDiv:
  492. case Instruction::URem:
  493. case Instruction::SRem:
  494. case Instruction::FRem:
  495. case Instruction::Shl:
  496. case Instruction::LShr:
  497. case Instruction::AShr:
  498. case Instruction::And:
  499. case Instruction::Or:
  500. case Instruction::Xor:
  501. case Instruction::ICmp:
  502. case Instruction::FCmp:
  503. case Instruction::Trunc:
  504. case Instruction::ZExt:
  505. case Instruction::SExt:
  506. case Instruction::FPToUI:
  507. case Instruction::FPToSI:
  508. case Instruction::UIToFP:
  509. case Instruction::SIToFP:
  510. case Instruction::FPTrunc:
  511. case Instruction::FPExt:
  512. case Instruction::GetElementPtr: {
  513. for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
  514. if (!CanEvaluateShuffled(I->getOperand(i), Mask, Depth-1))
  515. return false;
  516. }
  517. return true;
  518. }
  519. case Instruction::InsertElement: {
  520. ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2));
  521. if (!CI) return false;
  522. int ElementNumber = CI->getLimitedValue();
  523. // Verify that 'CI' does not occur twice in Mask. A single 'insertelement'
  524. // can't put an element into multiple indices.
  525. bool SeenOnce = false;
  526. for (int i = 0, e = Mask.size(); i != e; ++i) {
  527. if (Mask[i] == ElementNumber) {
  528. if (SeenOnce)
  529. return false;
  530. SeenOnce = true;
  531. }
  532. }
  533. return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1);
  534. }
  535. }
  536. return false;
  537. }
  538. /// Rebuild a new instruction just like 'I' but with the new operands given.
  539. /// In the event of type mismatch, the type of the operands is correct.
  540. static Value *BuildNew(Instruction *I, ArrayRef<Value*> NewOps) {
  541. // We don't want to use the IRBuilder here because we want the replacement
  542. // instructions to appear next to 'I', not the builder's insertion point.
  543. switch (I->getOpcode()) {
  544. case Instruction::Add:
  545. case Instruction::FAdd:
  546. case Instruction::Sub:
  547. case Instruction::FSub:
  548. case Instruction::Mul:
  549. case Instruction::FMul:
  550. case Instruction::UDiv:
  551. case Instruction::SDiv:
  552. case Instruction::FDiv:
  553. case Instruction::URem:
  554. case Instruction::SRem:
  555. case Instruction::FRem:
  556. case Instruction::Shl:
  557. case Instruction::LShr:
  558. case Instruction::AShr:
  559. case Instruction::And:
  560. case Instruction::Or:
  561. case Instruction::Xor: {
  562. BinaryOperator *BO = cast<BinaryOperator>(I);
  563. assert(NewOps.size() == 2 && "binary operator with #ops != 2");
  564. BinaryOperator *New =
  565. BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(),
  566. NewOps[0], NewOps[1], "", BO);
  567. if (isa<OverflowingBinaryOperator>(BO)) {
  568. New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap());
  569. New->setHasNoSignedWrap(BO->hasNoSignedWrap());
  570. }
  571. if (isa<PossiblyExactOperator>(BO)) {
  572. New->setIsExact(BO->isExact());
  573. }
  574. if (isa<FPMathOperator>(BO))
  575. New->copyFastMathFlags(I);
  576. return New;
  577. }
  578. case Instruction::ICmp:
  579. assert(NewOps.size() == 2 && "icmp with #ops != 2");
  580. return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(),
  581. NewOps[0], NewOps[1]);
  582. case Instruction::FCmp:
  583. assert(NewOps.size() == 2 && "fcmp with #ops != 2");
  584. return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(),
  585. NewOps[0], NewOps[1]);
  586. case Instruction::Trunc:
  587. case Instruction::ZExt:
  588. case Instruction::SExt:
  589. case Instruction::FPToUI:
  590. case Instruction::FPToSI:
  591. case Instruction::UIToFP:
  592. case Instruction::SIToFP:
  593. case Instruction::FPTrunc:
  594. case Instruction::FPExt: {
  595. // It's possible that the mask has a different number of elements from
  596. // the original cast. We recompute the destination type to match the mask.
  597. Type *DestTy =
  598. VectorType::get(I->getType()->getScalarType(),
  599. NewOps[0]->getType()->getVectorNumElements());
  600. assert(NewOps.size() == 1 && "cast with #ops != 1");
  601. return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy,
  602. "", I);
  603. }
  604. case Instruction::GetElementPtr: {
  605. Value *Ptr = NewOps[0];
  606. ArrayRef<Value*> Idx = NewOps.slice(1);
  607. GetElementPtrInst *GEP = GetElementPtrInst::Create(Ptr, Idx, "", I);
  608. GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds());
  609. return GEP;
  610. }
  611. }
  612. llvm_unreachable("failed to rebuild vector instructions");
  613. }
  614. Value *
  615. InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) {
  616. // Mask.size() does not need to be equal to the number of vector elements.
  617. assert(V->getType()->isVectorTy() && "can't reorder non-vector elements");
  618. if (isa<UndefValue>(V)) {
  619. return UndefValue::get(VectorType::get(V->getType()->getScalarType(),
  620. Mask.size()));
  621. }
  622. if (isa<ConstantAggregateZero>(V)) {
  623. return ConstantAggregateZero::get(
  624. VectorType::get(V->getType()->getScalarType(),
  625. Mask.size()));
  626. }
  627. if (Constant *C = dyn_cast<Constant>(V)) {
  628. SmallVector<Constant *, 16> MaskValues;
  629. for (int i = 0, e = Mask.size(); i != e; ++i) {
  630. if (Mask[i] == -1)
  631. MaskValues.push_back(UndefValue::get(Builder->getInt32Ty()));
  632. else
  633. MaskValues.push_back(Builder->getInt32(Mask[i]));
  634. }
  635. return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()),
  636. ConstantVector::get(MaskValues));
  637. }
  638. Instruction *I = cast<Instruction>(V);
  639. switch (I->getOpcode()) {
  640. case Instruction::Add:
  641. case Instruction::FAdd:
  642. case Instruction::Sub:
  643. case Instruction::FSub:
  644. case Instruction::Mul:
  645. case Instruction::FMul:
  646. case Instruction::UDiv:
  647. case Instruction::SDiv:
  648. case Instruction::FDiv:
  649. case Instruction::URem:
  650. case Instruction::SRem:
  651. case Instruction::FRem:
  652. case Instruction::Shl:
  653. case Instruction::LShr:
  654. case Instruction::AShr:
  655. case Instruction::And:
  656. case Instruction::Or:
  657. case Instruction::Xor:
  658. case Instruction::ICmp:
  659. case Instruction::FCmp:
  660. case Instruction::Trunc:
  661. case Instruction::ZExt:
  662. case Instruction::SExt:
  663. case Instruction::FPToUI:
  664. case Instruction::FPToSI:
  665. case Instruction::UIToFP:
  666. case Instruction::SIToFP:
  667. case Instruction::FPTrunc:
  668. case Instruction::FPExt:
  669. case Instruction::Select:
  670. case Instruction::GetElementPtr: {
  671. SmallVector<Value*, 8> NewOps;
  672. bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements());
  673. for (int i = 0, e = I->getNumOperands(); i != e; ++i) {
  674. Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask);
  675. NewOps.push_back(V);
  676. NeedsRebuild |= (V != I->getOperand(i));
  677. }
  678. if (NeedsRebuild) {
  679. return BuildNew(I, NewOps);
  680. }
  681. return I;
  682. }
  683. case Instruction::InsertElement: {
  684. int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue();
  685. // The insertelement was inserting at Element. Figure out which element
  686. // that becomes after shuffling. The answer is guaranteed to be unique
  687. // by CanEvaluateShuffled.
  688. bool Found = false;
  689. int Index = 0;
  690. for (int e = Mask.size(); Index != e; ++Index) {
  691. if (Mask[Index] == Element) {
  692. Found = true;
  693. break;
  694. }
  695. }
  696. // If element is not in Mask, no need to handle the operand 1 (element to
  697. // be inserted). Just evaluate values in operand 0 according to Mask.
  698. if (!Found)
  699. return EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
  700. Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask);
  701. return InsertElementInst::Create(V, I->getOperand(1),
  702. Builder->getInt32(Index), "", I);
  703. }
  704. }
  705. llvm_unreachable("failed to reorder elements of vector instruction!");
  706. }
  707. Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
  708. Value *LHS = SVI.getOperand(0);
  709. Value *RHS = SVI.getOperand(1);
  710. SmallVector<int, 16> Mask = SVI.getShuffleMask();
  711. bool MadeChange = false;
  712. // Undefined shuffle mask -> undefined value.
  713. if (isa<UndefValue>(SVI.getOperand(2)))
  714. return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType()));
  715. unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements();
  716. APInt UndefElts(VWidth, 0);
  717. APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth));
  718. if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) {
  719. if (V != &SVI)
  720. return ReplaceInstUsesWith(SVI, V);
  721. LHS = SVI.getOperand(0);
  722. RHS = SVI.getOperand(1);
  723. MadeChange = true;
  724. }
  725. unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements();
  726. // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask')
  727. // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask').
  728. if (LHS == RHS || isa<UndefValue>(LHS)) {
  729. if (isa<UndefValue>(LHS) && LHS == RHS) {
  730. // shuffle(undef,undef,mask) -> undef.
  731. Value *Result = (VWidth == LHSWidth)
  732. ? LHS : UndefValue::get(SVI.getType());
  733. return ReplaceInstUsesWith(SVI, Result);
  734. }
  735. // Remap any references to RHS to use LHS.
  736. SmallVector<Constant*, 16> Elts;
  737. for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) {
  738. if (Mask[i] < 0) {
  739. Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
  740. continue;
  741. }
  742. if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) ||
  743. (Mask[i] < (int)e && isa<UndefValue>(LHS))) {
  744. Mask[i] = -1; // Turn into undef.
  745. Elts.push_back(UndefValue::get(Type::getInt32Ty(SVI.getContext())));
  746. } else {
  747. Mask[i] = Mask[i] % e; // Force to LHS.
  748. Elts.push_back(ConstantInt::get(Type::getInt32Ty(SVI.getContext()),
  749. Mask[i]));
  750. }
  751. }
  752. SVI.setOperand(0, SVI.getOperand(1));
  753. SVI.setOperand(1, UndefValue::get(RHS->getType()));
  754. SVI.setOperand(2, ConstantVector::get(Elts));
  755. LHS = SVI.getOperand(0);
  756. RHS = SVI.getOperand(1);
  757. MadeChange = true;
  758. }
  759. if (VWidth == LHSWidth) {
  760. // Analyze the shuffle, are the LHS or RHS and identity shuffles?
  761. bool isLHSID = true, isRHSID = true;
  762. for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
  763. if (Mask[i] < 0) continue; // Ignore undef values.
  764. // Is this an identity shuffle of the LHS value?
  765. isLHSID &= (Mask[i] == (int)i);
  766. // Is this an identity shuffle of the RHS value?
  767. isRHSID &= (Mask[i]-e == i);
  768. }
  769. // Eliminate identity shuffles.
  770. if (isLHSID) return ReplaceInstUsesWith(SVI, LHS);
  771. if (isRHSID) return ReplaceInstUsesWith(SVI, RHS);
  772. }
  773. if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) {
  774. Value *V = EvaluateInDifferentElementOrder(LHS, Mask);
  775. return ReplaceInstUsesWith(SVI, V);
  776. }
  777. // If the LHS is a shufflevector itself, see if we can combine it with this
  778. // one without producing an unusual shuffle.
  779. // Cases that might be simplified:
  780. // 1.
  781. // x1=shuffle(v1,v2,mask1)
  782. // x=shuffle(x1,undef,mask)
  783. // ==>
  784. // x=shuffle(v1,undef,newMask)
  785. // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1
  786. // 2.
  787. // x1=shuffle(v1,undef,mask1)
  788. // x=shuffle(x1,x2,mask)
  789. // where v1.size() == mask1.size()
  790. // ==>
  791. // x=shuffle(v1,x2,newMask)
  792. // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i]
  793. // 3.
  794. // x2=shuffle(v2,undef,mask2)
  795. // x=shuffle(x1,x2,mask)
  796. // where v2.size() == mask2.size()
  797. // ==>
  798. // x=shuffle(x1,v2,newMask)
  799. // newMask[i] = (mask[i] < x1.size())
  800. // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size()
  801. // 4.
  802. // x1=shuffle(v1,undef,mask1)
  803. // x2=shuffle(v2,undef,mask2)
  804. // x=shuffle(x1,x2,mask)
  805. // where v1.size() == v2.size()
  806. // ==>
  807. // x=shuffle(v1,v2,newMask)
  808. // newMask[i] = (mask[i] < x1.size())
  809. // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size()
  810. //
  811. // Here we are really conservative:
  812. // we are absolutely afraid of producing a shuffle mask not in the input
  813. // program, because the code gen may not be smart enough to turn a merged
  814. // shuffle into two specific shuffles: it may produce worse code. As such,
  815. // we only merge two shuffles if the result is either a splat or one of the
  816. // input shuffle masks. In this case, merging the shuffles just removes
  817. // one instruction, which we know is safe. This is good for things like
  818. // turning: (splat(splat)) -> splat, or
  819. // merge(V[0..n], V[n+1..2n]) -> V[0..2n]
  820. ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS);
  821. ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS);
  822. if (LHSShuffle)
  823. if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS))
  824. LHSShuffle = NULL;
  825. if (RHSShuffle)
  826. if (!isa<UndefValue>(RHSShuffle->getOperand(1)))
  827. RHSShuffle = NULL;
  828. if (!LHSShuffle && !RHSShuffle)
  829. return MadeChange ? &SVI : 0;
  830. Value* LHSOp0 = NULL;
  831. Value* LHSOp1 = NULL;
  832. Value* RHSOp0 = NULL;
  833. unsigned LHSOp0Width = 0;
  834. unsigned RHSOp0Width = 0;
  835. if (LHSShuffle) {
  836. LHSOp0 = LHSShuffle->getOperand(0);
  837. LHSOp1 = LHSShuffle->getOperand(1);
  838. LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements();
  839. }
  840. if (RHSShuffle) {
  841. RHSOp0 = RHSShuffle->getOperand(0);
  842. RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements();
  843. }
  844. Value* newLHS = LHS;
  845. Value* newRHS = RHS;
  846. if (LHSShuffle) {
  847. // case 1
  848. if (isa<UndefValue>(RHS)) {
  849. newLHS = LHSOp0;
  850. newRHS = LHSOp1;
  851. }
  852. // case 2 or 4
  853. else if (LHSOp0Width == LHSWidth) {
  854. newLHS = LHSOp0;
  855. }
  856. }
  857. // case 3 or 4
  858. if (RHSShuffle && RHSOp0Width == LHSWidth) {
  859. newRHS = RHSOp0;
  860. }
  861. // case 4
  862. if (LHSOp0 == RHSOp0) {
  863. newLHS = LHSOp0;
  864. newRHS = NULL;
  865. }
  866. if (newLHS == LHS && newRHS == RHS)
  867. return MadeChange ? &SVI : 0;
  868. SmallVector<int, 16> LHSMask;
  869. SmallVector<int, 16> RHSMask;
  870. if (newLHS != LHS)
  871. LHSMask = LHSShuffle->getShuffleMask();
  872. if (RHSShuffle && newRHS != RHS)
  873. RHSMask = RHSShuffle->getShuffleMask();
  874. unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth;
  875. SmallVector<int, 16> newMask;
  876. bool isSplat = true;
  877. int SplatElt = -1;
  878. // Create a new mask for the new ShuffleVectorInst so that the new
  879. // ShuffleVectorInst is equivalent to the original one.
  880. for (unsigned i = 0; i < VWidth; ++i) {
  881. int eltMask;
  882. if (Mask[i] < 0) {
  883. // This element is an undef value.
  884. eltMask = -1;
  885. } else if (Mask[i] < (int)LHSWidth) {
  886. // This element is from left hand side vector operand.
  887. //
  888. // If LHS is going to be replaced (case 1, 2, or 4), calculate the
  889. // new mask value for the element.
  890. if (newLHS != LHS) {
  891. eltMask = LHSMask[Mask[i]];
  892. // If the value selected is an undef value, explicitly specify it
  893. // with a -1 mask value.
  894. if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1))
  895. eltMask = -1;
  896. } else
  897. eltMask = Mask[i];
  898. } else {
  899. // This element is from right hand side vector operand
  900. //
  901. // If the value selected is an undef value, explicitly specify it
  902. // with a -1 mask value. (case 1)
  903. if (isa<UndefValue>(RHS))
  904. eltMask = -1;
  905. // If RHS is going to be replaced (case 3 or 4), calculate the
  906. // new mask value for the element.
  907. else if (newRHS != RHS) {
  908. eltMask = RHSMask[Mask[i]-LHSWidth];
  909. // If the value selected is an undef value, explicitly specify it
  910. // with a -1 mask value.
  911. if (eltMask >= (int)RHSOp0Width) {
  912. assert(isa<UndefValue>(RHSShuffle->getOperand(1))
  913. && "should have been check above");
  914. eltMask = -1;
  915. }
  916. } else
  917. eltMask = Mask[i]-LHSWidth;
  918. // If LHS's width is changed, shift the mask value accordingly.
  919. // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any
  920. // references from RHSOp0 to LHSOp0, so we don't need to shift the mask.
  921. // If newRHS == newLHS, we want to remap any references from newRHS to
  922. // newLHS so that we can properly identify splats that may occur due to
  923. // obfuscation across the two vectors.
  924. if (eltMask >= 0 && newRHS != NULL && newLHS != newRHS)
  925. eltMask += newLHSWidth;
  926. }
  927. // Check if this could still be a splat.
  928. if (eltMask >= 0) {
  929. if (SplatElt >= 0 && SplatElt != eltMask)
  930. isSplat = false;
  931. SplatElt = eltMask;
  932. }
  933. newMask.push_back(eltMask);
  934. }
  935. // If the result mask is equal to one of the original shuffle masks,
  936. // or is a splat, do the replacement.
  937. if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) {
  938. SmallVector<Constant*, 16> Elts;
  939. Type *Int32Ty = Type::getInt32Ty(SVI.getContext());
  940. for (unsigned i = 0, e = newMask.size(); i != e; ++i) {
  941. if (newMask[i] < 0) {
  942. Elts.push_back(UndefValue::get(Int32Ty));
  943. } else {
  944. Elts.push_back(ConstantInt::get(Int32Ty, newMask[i]));
  945. }
  946. }
  947. if (newRHS == NULL)
  948. newRHS = UndefValue::get(newLHS->getType());
  949. return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts));
  950. }
  951. return MadeChange ? &SVI : 0;
  952. }