10 年之前 · c57ee2b13e
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -217,9 +217,9 @@ static cl::opt<unsigned> MaxNestedScalarReductionIC(
 
				 namespace {
			
 
				 
			
 
				 // Forward declarations.
			
 
				+class LoopVectorizeHints;
			
 
				 class LoopVectorizationLegality;
			
 
				 class LoopVectorizationCostModel;
			
 
				-class LoopVectorizeHints;
			
 
				 class LoopVectorizationRequirements;
			
 
				 
			
 
				 /// \brief This modifies LoopAccessReport to initialize message with
			
@@ -779,680 +779,680 @@ private:
 
				       const ValueToValueMap &Strides);
			
 
				 };
			
 
				 
			
 
				-/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
			
 
				-/// to what vectorization factor.
			
 
				-/// This class does not look at the profitability of vectorization, only the
			
 
				-/// legality. This class has two main kinds of checks:
			
 
				-/// * Memory checks - The code in canVectorizeMemory checks if vectorization
			
 
				-///   will change the order of memory accesses in a way that will change the
			
 
				-///   correctness of the program.
			
 
				-/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
			
 
				-/// checks for a number of different conditions, such as the availability of a
			
 
				-/// single induction variable, that all types are supported and vectorize-able,
			
 
				-/// etc. This code reflects the capabilities of InnerLoopVectorizer.
			
 
				-/// This class is also used by InnerLoopVectorizer for identifying
			
 
				-/// induction variable and the different reduction variables.
			
 
				-class LoopVectorizationLegality {
			
 
				-public:
			
 
				-  LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
			
 
				-                            TargetLibraryInfo *TLI, AliasAnalysis *AA,
			
 
				-                            Function *F, const TargetTransformInfo *TTI,
			
 
				-                            LoopAccessAnalysis *LAA,
			
 
				-                            LoopVectorizationRequirements *R)
			
 
				-      : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F),
			
 
				-        TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(SE, L, DT),
			
 
				-        Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false),
			
 
				-        Requirements(R) {}
			
 
				-
			
 
				-  /// This enum represents the kinds of inductions that we support.
			
 
				-  enum InductionKind {
			
 
				-    IK_NoInduction,  ///< Not an induction variable.
			
 
				-    IK_IntInduction, ///< Integer induction variable. Step = C.
			
 
				-    IK_PtrInduction  ///< Pointer induction var. Step = C / sizeof(elem).
			
 
				+/// Utility class for getting and setting loop vectorizer hints in the form
			
 
				+/// of loop metadata.
			
 
				+/// This class keeps a number of loop annotations locally (as member variables)
			
 
				+/// and can, upon request, write them back as metadata on the loop. It will
			
 
				+/// initially scan the loop for existing metadata, and will update the local
			
 
				+/// values based on information in the loop.
			
 
				+/// We cannot write all values to metadata, as the mere presence of some info,
			
 
				+/// for example 'force', means a decision has been made. So, we need to be
			
 
				+/// careful NOT to add them if the user hasn't specifically asked so.
			
 
				+class LoopVectorizeHints {
			
 
				+  enum HintKind {
			
 
				+    HK_WIDTH,
			
 
				+    HK_UNROLL,
			
 
				+    HK_FORCE
			
 
				   };
			
 
				 
			
 
				-  /// A struct for saving information about induction variables.
			
 
				-  struct InductionInfo {
			
 
				-    InductionInfo(Value *Start, InductionKind K, ConstantInt *Step)
			
 
				-        : StartValue(Start), IK(K), StepValue(Step) {
			
 
				-      assert(IK != IK_NoInduction && "Not an induction");
			
 
				-      assert(StartValue && "StartValue is null");
			
 
				-      assert(StepValue && !StepValue->isZero() && "StepValue is zero");
			
 
				-      assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
			
 
				-             "StartValue is not a pointer for pointer induction");
			
 
				-      assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
			
 
				-             "StartValue is not an integer for integer induction");
			
 
				-      assert(StepValue->getType()->isIntegerTy() &&
			
 
				-             "StepValue is not an integer");
			
 
				-    }
			
 
				-    InductionInfo()
			
 
				-        : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
			
 
				-
			
 
				-    /// Get the consecutive direction. Returns:
			
 
				-    ///   0 - unknown or non-consecutive.
			
 
				-    ///   1 - consecutive and increasing.
			
 
				-    ///  -1 - consecutive and decreasing.
			
 
				-    int getConsecutiveDirection() const {
			
 
				-      if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
			
 
				-        return StepValue->getSExtValue();
			
 
				-      return 0;
			
 
				-    }
			
 
				-
			
 
				-    /// Compute the transformed value of Index at offset StartValue using step
			
 
				-    /// StepValue.
			
 
				-    /// For integer induction, returns StartValue + Index * StepValue.
			
 
				-    /// For pointer induction, returns StartValue[Index * StepValue].
			
 
				-    /// FIXME: The newly created binary instructions should contain nsw/nuw
			
 
				-    /// flags, which can be found from the original scalar operations.
			
 
				-    Value *transform(IRBuilder<> &B, Value *Index) const {
			
 
				-      switch (IK) {
			
 
				-      case IK_IntInduction:
			
 
				-        assert(Index->getType() == StartValue->getType() &&
			
 
				-               "Index type does not match StartValue type");
			
 
				-        if (StepValue->isMinusOne())
			
 
				-          return B.CreateSub(StartValue, Index);
			
 
				-        if (!StepValue->isOne())
			
 
				-          Index = B.CreateMul(Index, StepValue);
			
 
				-        return B.CreateAdd(StartValue, Index);
			
 
				+  /// Hint - associates name and validation with the hint value.
			
 
				+  struct Hint {
			
 
				+    const char * Name;
			
 
				+    unsigned Value; // This may have to change for non-numeric values.
			
 
				+    HintKind Kind;
			
 
				 
			
 
				-      case IK_PtrInduction:
			
 
				-        assert(Index->getType() == StepValue->getType() &&
			
 
				-               "Index type does not match StepValue type");
			
 
				-        if (StepValue->isMinusOne())
			
 
				-          Index = B.CreateNeg(Index);
			
 
				-        else if (!StepValue->isOne())
			
 
				-          Index = B.CreateMul(Index, StepValue);
			
 
				-        return B.CreateGEP(nullptr, StartValue, Index);
			
 
				+    Hint(const char * Name, unsigned Value, HintKind Kind)
			
 
				+      : Name(Name), Value(Value), Kind(Kind) { }
			
 
				 
			
 
				-      case IK_NoInduction:
			
 
				-        return nullptr;
			
 
				+    bool validate(unsigned Val) {
			
 
				+      switch (Kind) {
			
 
				+      case HK_WIDTH:
			
 
				+        return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
			
 
				+      case HK_UNROLL:
			
 
				+        return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
			
 
				+      case HK_FORCE:
			
 
				+        return (Val <= 1);
			
 
				       }
			
 
				-      llvm_unreachable("invalid enum");
			
 
				+      return false;
			
 
				     }
			
 
				-
			
 
				-    /// Start value.
			
 
				-    TrackingVH<Value> StartValue;
			
 
				-    /// Induction kind.
			
 
				-    InductionKind IK;
			
 
				-    /// Step value.
			
 
				-    ConstantInt *StepValue;
			
 
				   };
			
 
				 
			
 
				-  /// ReductionList contains the reduction descriptors for all
			
 
				-  /// of the reductions that were found in the loop.
			
 
				-  typedef DenseMap<PHINode *, RecurrenceDescriptor> ReductionList;
			
 
				-
			
 
				-  /// InductionList saves induction variables and maps them to the
			
 
				-  /// induction descriptor.
			
 
				-  typedef MapVector<PHINode*, InductionInfo> InductionList;
			
 
				-
			
 
				-  /// Returns true if it is legal to vectorize this loop.
			
 
				-  /// This does not mean that it is profitable to vectorize this
			
 
				-  /// loop, only that it is legal to do so.
			
 
				-  bool canVectorize();
			
 
				+  /// Vectorization width.
			
 
				+  Hint Width;
			
 
				+  /// Vectorization interleave factor.
			
 
				+  Hint Interleave;
			
 
				+  /// Vectorization forced
			
 
				+  Hint Force;
			
 
				 
			
 
				-  /// Returns the Induction variable.
			
 
				-  PHINode *getInduction() { return Induction; }
			
 
				+  /// Return the loop metadata prefix.
			
 
				+  static StringRef Prefix() { return "llvm.loop."; }
			
 
				 
			
 
				-  /// Returns the reduction variables found in the loop.
			
 
				-  ReductionList *getReductionVars() { return &Reductions; }
			
 
				+public:
			
 
				+  enum ForceKind {
			
 
				+    FK_Undefined = -1, ///< Not selected.
			
 
				+    FK_Disabled = 0,   ///< Forcing disabled.
			
 
				+    FK_Enabled = 1,    ///< Forcing enabled.
			
 
				+  };
			
 
				 
			
 
				-  /// Returns the induction variables found in the loop.
			
 
				-  InductionList *getInductionVars() { return &Inductions; }
			
 
				+  LoopVectorizeHints(const Loop *L, bool DisableInterleaving)
			
 
				+      : Width("vectorize.width", VectorizerParams::VectorizationFactor,
			
 
				+              HK_WIDTH),
			
 
				+        Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
			
 
				+        Force("vectorize.enable", FK_Undefined, HK_FORCE),
			
 
				+        TheLoop(L) {
			
 
				+    // Populate values with existing loop metadata.
			
 
				+    getHintsFromMetadata();
			
 
				 
			
 
				-  /// Returns the widest induction type.
			
 
				-  Type *getWidestInductionType() { return WidestIndTy; }
			
 
				+    // force-vector-interleave overrides DisableInterleaving.
			
 
				+    if (VectorizerParams::isInterleaveForced())
			
 
				+      Interleave.Value = VectorizerParams::VectorizationInterleave;
			
 
				 
			
 
				-  /// Returns True if V is an induction variable in this loop.
			
 
				-  bool isInductionVariable(const Value *V);
			
 
				+    DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
			
 
				+          << "LV: Interleaving disabled by the pass manager\n");
			
 
				+  }
			
 
				 
			
 
				-  /// Return true if the block BB needs to be predicated in order for the loop
			
 
				-  /// to be vectorized.
			
 
				-  bool blockNeedsPredication(BasicBlock *BB);
			
 
				+  /// Mark the loop L as already vectorized by setting the width to 1.
			
 
				+  void setAlreadyVectorized() {
			
 
				+    Width.Value = Interleave.Value = 1;
			
 
				+    Hint Hints[] = {Width, Interleave};
			
 
				+    writeHintsToMetadata(Hints);
			
 
				+  }
			
 
				 
			
 
				-  /// Check if this  pointer is consecutive when vectorizing. This happens
			
 
				-  /// when the last index of the GEP is the induction variable, or that the
			
 
				-  /// pointer itself is an induction variable.
			
 
				-  /// This check allows us to vectorize A[idx] into a wide load/store.
			
 
				-  /// Returns:
			
 
				-  /// 0 - Stride is unknown or non-consecutive.
			
 
				-  /// 1 - Address is consecutive.
			
 
				-  /// -1 - Address is consecutive, and decreasing.
			
 
				-  int isConsecutivePtr(Value *Ptr);
			
 
				+  bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const {
			
 
				+    if (getForce() == LoopVectorizeHints::FK_Disabled) {
			
 
				+      DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
			
 
				+      emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
			
 
				+                                     L->getStartLoc(), emitRemark());
			
 
				+      return false;
			
 
				+    }
			
 
				 
			
 
				-  /// Returns true if the value V is uniform within the loop.
			
 
				-  bool isUniform(Value *V);
			
 
				+    if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) {
			
 
				+      DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
			
 
				+      emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
			
 
				+                                     L->getStartLoc(), emitRemark());
			
 
				+      return false;
			
 
				+    }
			
 
				 
			
 
				-  /// Returns true if this instruction will remain scalar after vectorization.
			
 
				-  bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
			
 
				+    if (getWidth() == 1 && getInterleave() == 1) {
			
 
				+      // FIXME: Add a separate metadata to indicate when the loop has already
			
 
				+      // been vectorized instead of setting width and count to 1.
			
 
				+      DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
			
 
				+      // FIXME: Add interleave.disable metadata. This will allow
			
 
				+      // vectorize.disable to be used without disabling the pass and errors
			
 
				+      // to differentiate between disabled vectorization and a width of 1.
			
 
				+      emitOptimizationRemarkAnalysis(
			
 
				+          F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
			
 
				+          "loop not vectorized: vectorization and interleaving are explicitly "
			
 
				+          "disabled, or vectorize width and interleave count are both set to "
			
 
				+          "1");
			
 
				+      return false;
			
 
				+    }
			
 
				 
			
 
				-  /// Returns the information that we collected about runtime memory check.
			
 
				-  const RuntimePointerChecking *getRuntimePointerChecking() const {
			
 
				-    return LAI->getRuntimePointerChecking();
			
 
				+    return true;
			
 
				   }
			
 
				 
			
 
				-  const LoopAccessInfo *getLAI() const {
			
 
				-    return LAI;
			
 
				-  }
			
 
				+  /// Dumps all the hint information.
			
 
				+  std::string emitRemark() const {
			
 
				+    VectorizationReport R;
			
 
				+    if (Force.Value == LoopVectorizeHints::FK_Disabled)
			
 
				+      R << "vectorization is explicitly disabled";
			
 
				+    else {
			
 
				+      R << "use -Rpass-analysis=loop-vectorize for more info";
			
 
				+      if (Force.Value == LoopVectorizeHints::FK_Enabled) {
			
 
				+        R << " (Force=true";
			
 
				+        if (Width.Value != 0)
			
 
				+          R << ", Vector Width=" << Width.Value;
			
 
				+        if (Interleave.Value != 0)
			
 
				+          R << ", Interleave Count=" << Interleave.Value;
			
 
				+        R << ")";
			
 
				+      }
			
 
				+    }
			
 
				 
			
 
				-  /// \brief Check if \p Instr belongs to any interleaved access group.
			
 
				-  bool isAccessInterleaved(Instruction *Instr) {
			
 
				-    return InterleaveInfo.isInterleaved(Instr);
			
 
				+    return R.str();
			
 
				   }
			
 
				 
			
 
				-  /// \brief Get the interleaved access group that \p Instr belongs to.
			
 
				-  const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) {
			
 
				-    return InterleaveInfo.getInterleaveGroup(Instr);
			
 
				-  }
			
 
				+  unsigned getWidth() const { return Width.Value; }
			
 
				+  unsigned getInterleave() const { return Interleave.Value; }
			
 
				+  enum ForceKind getForce() const { return (ForceKind)Force.Value; }
			
 
				 
			
 
				-  unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
			
 
				+private:
			
 
				+  /// Find hints specified in the loop metadata and update local values.
			
 
				+  void getHintsFromMetadata() {
			
 
				+    MDNode *LoopID = TheLoop->getLoopID();
			
 
				+    if (!LoopID)
			
 
				+      return;
			
 
				 
			
 
				-  bool hasStride(Value *V) { return StrideSet.count(V); }
			
 
				-  bool mustCheckStrides() { return !StrideSet.empty(); }
			
 
				-  SmallPtrSet<Value *, 8>::iterator strides_begin() {
			
 
				-    return StrideSet.begin();
			
 
				-  }
			
 
				-  SmallPtrSet<Value *, 8>::iterator strides_end() { return StrideSet.end(); }
			
 
				+    // First operand should refer to the loop id itself.
			
 
				+    assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
			
 
				+    assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
			
 
				 
			
 
				-  /// Returns true if the target machine supports masked store operation
			
 
				-  /// for the given \p DataType and kind of access to \p Ptr.
			
 
				-  bool isLegalMaskedStore(Type *DataType, Value *Ptr) {
			
 
				-    return TTI->isLegalMaskedStore(DataType, isConsecutivePtr(Ptr));
			
 
				-  }
			
 
				-  /// Returns true if the target machine supports masked load operation
			
 
				-  /// for the given \p DataType and kind of access to \p Ptr.
			
 
				-  bool isLegalMaskedLoad(Type *DataType, Value *Ptr) {
			
 
				-    return TTI->isLegalMaskedLoad(DataType, isConsecutivePtr(Ptr));
			
 
				-  }
			
 
				-  /// Returns true if vector representation of the instruction \p I
			
 
				-  /// requires mask.
			
 
				-  bool isMaskRequired(const Instruction* I) {
			
 
				-    return (MaskedOp.count(I) != 0);
			
 
				-  }
			
 
				-  unsigned getNumStores() const {
			
 
				-    return LAI->getNumStores();
			
 
				-  }
			
 
				-  unsigned getNumLoads() const {
			
 
				-    return LAI->getNumLoads();
			
 
				-  }
			
 
				-  unsigned getNumPredStores() const {
			
 
				-    return NumPredStores;
			
 
				-  }
			
 
				-private:
			
 
				-  /// Check if a single basic block loop is vectorizable.
			
 
				-  /// At this point we know that this is a loop with a constant trip count
			
 
				-  /// and we only need to check individual instructions.
			
 
				-  bool canVectorizeInstrs();
			
 
				+    for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
			
 
				+      const MDString *S = nullptr;
			
 
				+      SmallVector<Metadata *, 4> Args;
			
 
				 
			
 
				-  /// When we vectorize loops we may change the order in which
			
 
				-  /// we read and write from memory. This method checks if it is
			
 
				-  /// legal to vectorize the code, considering only memory constrains.
			
 
				-  /// Returns true if the loop is vectorizable
			
 
				-  bool canVectorizeMemory();
			
 
				+      // The expected hint is either a MDString or a MDNode with the first
			
 
				+      // operand a MDString.
			
 
				+      if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
			
 
				+        if (!MD || MD->getNumOperands() == 0)
			
 
				+          continue;
			
 
				+        S = dyn_cast<MDString>(MD->getOperand(0));
			
 
				+        for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
			
 
				+          Args.push_back(MD->getOperand(i));
			
 
				+      } else {
			
 
				+        S = dyn_cast<MDString>(LoopID->getOperand(i));
			
 
				+        assert(Args.size() == 0 && "too many arguments for MDString");
			
 
				+      }
			
 
				 
			
 
				-  /// Return true if we can vectorize this loop using the IF-conversion
			
 
				-  /// transformation.
			
 
				-  bool canVectorizeWithIfConvert();
			
 
				+      if (!S)
			
 
				+        continue;
			
 
				 
			
 
				-  /// Collect the variables that need to stay uniform after vectorization.
			
 
				-  void collectLoopUniforms();
			
 
				+      // Check if the hint starts with the loop metadata prefix.
			
 
				+      StringRef Name = S->getString();
			
 
				+      if (Args.size() == 1)
			
 
				+        setHint(Name, Args[0]);
			
 
				+    }
			
 
				+  }
			
 
				 
			
 
				-  /// Return true if all of the instructions in the block can be speculatively
			
 
				-  /// executed. \p SafePtrs is a list of addresses that are known to be legal
			
 
				-  /// and we know that we can read from them without segfault.
			
 
				-  bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
			
 
				+  /// Checks string hint with one operand and set value if valid.
			
 
				+  void setHint(StringRef Name, Metadata *Arg) {
			
 
				+    if (!Name.startswith(Prefix()))
			
 
				+      return;
			
 
				+    Name = Name.substr(Prefix().size(), StringRef::npos);
			
 
				 
			
 
				-  /// Returns the induction kind of Phi and record the step. This function may
			
 
				-  /// return NoInduction if the PHI is not an induction variable.
			
 
				-  InductionKind isInductionVariable(PHINode *Phi, ConstantInt *&StepValue);
			
 
				+    const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
			
 
				+    if (!C) return;
			
 
				+    unsigned Val = C->getZExtValue();
			
 
				 
			
 
				-  /// \brief Collect memory access with loop invariant strides.
			
 
				-  ///
			
 
				-  /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
			
 
				-  /// invariant.
			
 
				-  void collectStridedAccess(Value *LoadOrStoreInst);
			
 
				+    Hint *Hints[] = {&Width, &Interleave, &Force};
			
 
				+    for (auto H : Hints) {
			
 
				+      if (Name == H->Name) {
			
 
				+        if (H->validate(Val))
			
 
				+          H->Value = Val;
			
 
				+        else
			
 
				+          DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
			
 
				+        break;
			
 
				+      }
			
 
				+    }
			
 
				+  }
			
 
				 
			
 
				-  /// Report an analysis message to assist the user in diagnosing loops that are
			
 
				-  /// not vectorized.  These are handled as LoopAccessReport rather than
			
 
				-  /// VectorizationReport because the << operator of VectorizationReport returns
			
 
				-  /// LoopAccessReport.
			
 
				-  void emitAnalysis(const LoopAccessReport &Message) {
			
 
				-    LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
			
 
				+  /// Create a new hint from name / value pair.
			
 
				+  MDNode *createHintMetadata(StringRef Name, unsigned V) const {
			
 
				+    LLVMContext &Context = TheLoop->getHeader()->getContext();
			
 
				+    Metadata *MDs[] = {MDString::get(Context, Name),
			
 
				+                       ConstantAsMetadata::get(
			
 
				+                           ConstantInt::get(Type::getInt32Ty(Context), V))};
			
 
				+    return MDNode::get(Context, MDs);
			
 
				   }
			
 
				 
			
 
				-  unsigned NumPredStores;
			
 
				+  /// Matches metadata with hint name.
			
 
				+  bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes) {
			
 
				+    MDString* Name = dyn_cast<MDString>(Node->getOperand(0));
			
 
				+    if (!Name)
			
 
				+      return false;
			
 
				 
			
 
				-  /// The loop that we evaluate.
			
 
				-  Loop *TheLoop;
			
 
				-  /// Scev analysis.
			
 
				-  ScalarEvolution *SE;
			
 
				-  /// Target Library Info.
			
 
				-  TargetLibraryInfo *TLI;
			
 
				-  /// Parent function
			
 
				-  Function *TheFunction;
			
 
				-  /// Target Transform Info
			
 
				-  const TargetTransformInfo *TTI;
			
 
				-  /// Dominator Tree.
			
 
				-  DominatorTree *DT;
			
 
				-  // LoopAccess analysis.
			
 
				-  LoopAccessAnalysis *LAA;
			
 
				-  // And the loop-accesses info corresponding to this loop.  This pointer is
			
 
				-  // null until canVectorizeMemory sets it up.
			
 
				-  const LoopAccessInfo *LAI;
			
 
				+    for (auto H : HintTypes)
			
 
				+      if (Name->getString().endswith(H.Name))
			
 
				+        return true;
			
 
				+    return false;
			
 
				+  }
			
 
				 
			
 
				-  /// The interleave access information contains groups of interleaved accesses
			
 
				-  /// with the same stride and close to each other.
			
 
				-  InterleavedAccessInfo InterleaveInfo;
			
 
				+  /// Sets current hints into loop metadata, keeping other values intact.
			
 
				+  void writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
			
 
				+    if (HintTypes.size() == 0)
			
 
				+      return;
			
 
				 
			
 
				-  //  ---  vectorization state --- //
			
 
				+    // Reserve the first element to LoopID (see below).
			
 
				+    SmallVector<Metadata *, 4> MDs(1);
			
 
				+    // If the loop already has metadata, then ignore the existing operands.
			
 
				+    MDNode *LoopID = TheLoop->getLoopID();
			
 
				+    if (LoopID) {
			
 
				+      for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
			
 
				+        MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
			
 
				+        // If node in update list, ignore old value.
			
 
				+        if (!matchesHintMetadataName(Node, HintTypes))
			
 
				+          MDs.push_back(Node);
			
 
				+      }
			
 
				+    }
			
 
				 
			
 
				-  /// Holds the integer induction variable. This is the counter of the
			
 
				-  /// loop.
			
 
				-  PHINode *Induction;
			
 
				-  /// Holds the reduction variables.
			
 
				-  ReductionList Reductions;
			
 
				-  /// Holds all of the induction variables that we found in the loop.
			
 
				-  /// Notice that inductions don't need to start at zero and that induction
			
 
				-  /// variables can be pointers.
			
 
				-  InductionList Inductions;
			
 
				-  /// Holds the widest induction type encountered.
			
 
				-  Type *WidestIndTy;
			
 
				+    // Now, add the missing hints.
			
 
				+    for (auto H : HintTypes)
			
 
				+      MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
			
 
				 
			
 
				-  /// Allowed outside users. This holds the reduction
			
 
				-  /// vars which can be accessed from outside the loop.
			
 
				-  SmallPtrSet<Value*, 4> AllowedExit;
			
 
				-  /// This set holds the variables which are known to be uniform after
			
 
				-  /// vectorization.
			
 
				-  SmallPtrSet<Instruction*, 4> Uniforms;
			
 
				+    // Replace current metadata node with new one.
			
 
				+    LLVMContext &Context = TheLoop->getHeader()->getContext();
			
 
				+    MDNode *NewLoopID = MDNode::get(Context, MDs);
			
 
				+    // Set operand 0 to refer to the loop id itself.
			
 
				+    NewLoopID->replaceOperandWith(0, NewLoopID);
			
 
				 
			
 
				-  /// Can we assume the absence of NaNs.
			
 
				-  bool HasFunNoNaNAttr;
			
 
				+    TheLoop->setLoopID(NewLoopID);
			
 
				+  }
			
 
				 
			
 
				-  /// Vectorization requirements that will go through late-evaluation.
			
 
				-  LoopVectorizationRequirements *Requirements;
			
 
				+  /// The loop these hints belong to.
			
 
				+  const Loop *TheLoop;
			
 
				+};
			
 
				 
			
 
				-  ValueToValueMap Strides;
			
 
				-  SmallPtrSet<Value *, 8> StrideSet;
			
 
				+static void emitMissedWarning(Function *F, Loop *L,
			
 
				+                              const LoopVectorizeHints &LH) {
			
 
				+  emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F,
			
 
				+                               L->getStartLoc(), LH.emitRemark());
			
 
				 
			
 
				-  /// While vectorizing these instructions we have to generate a
			
 
				-  /// call to the appropriate masked intrinsic
			
 
				-  SmallPtrSet<const Instruction*, 8> MaskedOp;
			
 
				-};
			
 
				+  if (LH.getForce() == LoopVectorizeHints::FK_Enabled) {
			
 
				+    if (LH.getWidth() != 1)
			
 
				+      emitLoopVectorizeWarning(
			
 
				+          F->getContext(), *F, L->getStartLoc(),
			
 
				+          "failed explicitly specified loop vectorization");
			
 
				+    else if (LH.getInterleave() != 1)
			
 
				+      emitLoopInterleaveWarning(
			
 
				+          F->getContext(), *F, L->getStartLoc(),
			
 
				+          "failed explicitly specified loop interleaving");
			
 
				+  }
			
 
				+}
			
 
				 
			
 
				-/// LoopVectorizationCostModel - estimates the expected speedups due to
			
 
				-/// vectorization.
			
 
				-/// In many cases vectorization is not profitable. This can happen because of
			
 
				-/// a number of reasons. In this class we mainly attempt to predict the
			
 
				-/// expected speedup/slowdowns due to the supported instruction set. We use the
			
 
				-/// TargetTransformInfo to query the different backends for the cost of
			
 
				-/// different operations.
			
 
				-class LoopVectorizationCostModel {
			
 
				+/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
			
 
				+/// to what vectorization factor.
			
 
				+/// This class does not look at the profitability of vectorization, only the
			
 
				+/// legality. This class has two main kinds of checks:
			
 
				+/// * Memory checks - The code in canVectorizeMemory checks if vectorization
			
 
				+///   will change the order of memory accesses in a way that will change the
			
 
				+///   correctness of the program.
			
 
				+/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
			
 
				+/// checks for a number of different conditions, such as the availability of a
			
 
				+/// single induction variable, that all types are supported and vectorize-able,
			
 
				+/// etc. This code reflects the capabilities of InnerLoopVectorizer.
			
 
				+/// This class is also used by InnerLoopVectorizer for identifying
			
 
				+/// induction variable and the different reduction variables.
			
 
				+class LoopVectorizationLegality {
			
 
				 public:
			
 
				-  LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
			
 
				-                             LoopVectorizationLegality *Legal,
			
 
				-                             const TargetTransformInfo &TTI,
			
 
				-                             const TargetLibraryInfo *TLI, AssumptionCache *AC,
			
 
				-                             const Function *F, const LoopVectorizeHints *Hints)
			
 
				-      : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI),
			
 
				-        TheFunction(F), Hints(Hints) {
			
 
				-    CodeMetrics::collectEphemeralValues(L, AC, EphValues);
			
 
				-  }
			
 
				+  LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
			
 
				+                            TargetLibraryInfo *TLI, AliasAnalysis *AA,
			
 
				+                            Function *F, const TargetTransformInfo *TTI,
			
 
				+                            LoopAccessAnalysis *LAA,
			
 
				+                            LoopVectorizationRequirements *R)
			
 
				+      : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F),
			
 
				+        TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(SE, L, DT),
			
 
				+        Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false),
			
 
				+        Requirements(R) {}
			
 
				 
			
 
				-  /// Information about vectorization costs
			
 
				-  struct VectorizationFactor {
			
 
				-    unsigned Width; // Vector width with best cost
			
 
				-    unsigned Cost; // Cost of the loop with that width
			
 
				+  /// This enum represents the kinds of inductions that we support.
			
 
				+  enum InductionKind {
			
 
				+    IK_NoInduction,  ///< Not an induction variable.
			
 
				+    IK_IntInduction, ///< Integer induction variable. Step = C.
			
 
				+    IK_PtrInduction  ///< Pointer induction var. Step = C / sizeof(elem).
			
 
				   };
			
 
				-  /// \return The most profitable vectorization factor and the cost of that VF.
			
 
				-  /// This method checks every power of two up to VF. If UserVF is not ZERO
			
 
				-  /// then this vectorization factor will be selected if vectorization is
			
 
				-  /// possible.
			
 
				-  VectorizationFactor selectVectorizationFactor(bool OptForSize);
			
 
				-
			
 
				-  /// \return The size (in bits) of the widest type in the code that
			
 
				-  /// needs to be vectorized. We ignore values that remain scalar such as
			
 
				-  /// 64 bit loop indices.
			
 
				-  unsigned getWidestType();
			
 
				 
			
 
				-  /// \return The desired interleave count.
			
 
				-  /// If interleave count has been specified by metadata it will be returned.
			
 
				-  /// Otherwise, the interleave count is computed and returned. VF and LoopCost
			
 
				-  /// are the selected vectorization factor and the cost of the selected VF.
			
 
				-  unsigned selectInterleaveCount(bool OptForSize, unsigned VF,
			
 
				-                                 unsigned LoopCost);
			
 
				+  /// A struct for saving information about induction variables.
			
 
				+  struct InductionInfo {
			
 
				+    InductionInfo(Value *Start, InductionKind K, ConstantInt *Step)
			
 
				+        : StartValue(Start), IK(K), StepValue(Step) {
			
 
				+      assert(IK != IK_NoInduction && "Not an induction");
			
 
				+      assert(StartValue && "StartValue is null");
			
 
				+      assert(StepValue && !StepValue->isZero() && "StepValue is zero");
			
 
				+      assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
			
 
				+             "StartValue is not a pointer for pointer induction");
			
 
				+      assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
			
 
				+             "StartValue is not an integer for integer induction");
			
 
				+      assert(StepValue->getType()->isIntegerTy() &&
			
 
				+             "StepValue is not an integer");
			
 
				+    }
			
 
				+    InductionInfo()
			
 
				+        : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
			
 
				 
			
 
				-  /// \return The most profitable unroll factor.
			
 
				-  /// This method finds the best unroll-factor based on register pressure and
			
 
				-  /// other parameters. VF and LoopCost are the selected vectorization factor
			
 
				-  /// and the cost of the selected VF.
			
 
				-  unsigned computeInterleaveCount(bool OptForSize, unsigned VF,
			
 
				-                                  unsigned LoopCost);
			
 
				+    /// Get the consecutive direction. Returns:
			
 
				+    ///   0 - unknown or non-consecutive.
			
 
				+    ///   1 - consecutive and increasing.
			
 
				+    ///  -1 - consecutive and decreasing.
			
 
				+    int getConsecutiveDirection() const {
			
 
				+      if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
			
 
				+        return StepValue->getSExtValue();
			
 
				+      return 0;
			
 
				+    }
			
 
				 
			
 
				-  /// \brief A struct that represents some properties of the register usage
			
 
				-  /// of a loop.
			
 
				-  struct RegisterUsage {
			
 
				-    /// Holds the number of loop invariant values that are used in the loop.
			
 
				-    unsigned LoopInvariantRegs;
			
 
				-    /// Holds the maximum number of concurrent live intervals in the loop.
			
 
				-    unsigned MaxLocalUsers;
			
 
				-    /// Holds the number of instructions in the loop.
			
 
				-    unsigned NumInstructions;
			
 
				-  };
			
 
				+    /// Compute the transformed value of Index at offset StartValue using step
			
 
				+    /// StepValue.
			
 
				+    /// For integer induction, returns StartValue + Index * StepValue.
			
 
				+    /// For pointer induction, returns StartValue[Index * StepValue].
			
 
				+    /// FIXME: The newly created binary instructions should contain nsw/nuw
			
 
				+    /// flags, which can be found from the original scalar operations.
			
 
				+    Value *transform(IRBuilder<> &B, Value *Index) const {
			
 
				+      switch (IK) {
			
 
				+      case IK_IntInduction:
			
 
				+        assert(Index->getType() == StartValue->getType() &&
			
 
				+               "Index type does not match StartValue type");
			
 
				+        if (StepValue->isMinusOne())
			
 
				+          return B.CreateSub(StartValue, Index);
			
 
				+        if (!StepValue->isOne())
			
 
				+          Index = B.CreateMul(Index, StepValue);
			
 
				+        return B.CreateAdd(StartValue, Index);
			
 
				 
			
 
				-  /// \return  information about the register usage of the loop.
			
 
				-  RegisterUsage calculateRegisterUsage();
			
 
				+      case IK_PtrInduction:
			
 
				+        assert(Index->getType() == StepValue->getType() &&
			
 
				+               "Index type does not match StepValue type");
			
 
				+        if (StepValue->isMinusOne())
			
 
				+          Index = B.CreateNeg(Index);
			
 
				+        else if (!StepValue->isOne())
			
 
				+          Index = B.CreateMul(Index, StepValue);
			
 
				+        return B.CreateGEP(nullptr, StartValue, Index);
			
 
				 
			
 
				-private:
			
 
				-  /// Returns the expected execution cost. The unit of the cost does
			
 
				-  /// not matter because we use the 'cost' units to compare different
			
 
				-  /// vector widths. The cost that is returned is *not* normalized by
			
 
				-  /// the factor width.
			
 
				-  unsigned expectedCost(unsigned VF);
			
 
				+      case IK_NoInduction:
			
 
				+        return nullptr;
			
 
				+      }
			
 
				+      llvm_unreachable("invalid enum");
			
 
				+    }
			
 
				 
			
 
				-  /// Returns the execution time cost of an instruction for a given vector
			
 
				-  /// width. Vector width of one means scalar.
			
 
				-  unsigned getInstructionCost(Instruction *I, unsigned VF);
			
 
				+    /// Start value.
			
 
				+    TrackingVH<Value> StartValue;
			
 
				+    /// Induction kind.
			
 
				+    InductionKind IK;
			
 
				+    /// Step value.
			
 
				+    ConstantInt *StepValue;
			
 
				+  };
			
 
				 
			
 
				-  /// Returns whether the instruction is a load or store and will be a emitted
			
 
				-  /// as a vector operation.
			
 
				-  bool isConsecutiveLoadOrStore(Instruction *I);
			
 
				+  /// ReductionList contains the reduction descriptors for all
			
 
				+  /// of the reductions that were found in the loop.
			
 
				+  typedef DenseMap<PHINode *, RecurrenceDescriptor> ReductionList;
			
 
				 
			
 
				-  /// Report an analysis message to assist the user in diagnosing loops that are
			
 
				-  /// not vectorized.  These are handled as LoopAccessReport rather than
			
 
				-  /// VectorizationReport because the << operator of VectorizationReport returns
			
 
				-  /// LoopAccessReport.
			
 
				-  void emitAnalysis(const LoopAccessReport &Message) {
			
 
				-    LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
			
 
				-  }
			
 
				+  /// InductionList saves induction variables and maps them to the
			
 
				+  /// induction descriptor.
			
 
				+  typedef MapVector<PHINode*, InductionInfo> InductionList;
			
 
				 
			
 
				-  /// Values used only by @llvm.assume calls.
			
 
				-  SmallPtrSet<const Value *, 32> EphValues;
			
 
				+  /// Returns true if it is legal to vectorize this loop.
			
 
				+  /// This does not mean that it is profitable to vectorize this
			
 
				+  /// loop, only that it is legal to do so.
			
 
				+  bool canVectorize();
			
 
				 
			
 
				-  /// The loop that we evaluate.
			
 
				-  Loop *TheLoop;
			
 
				-  /// Scev analysis.
			
 
				-  ScalarEvolution *SE;
			
 
				-  /// Loop Info analysis.
			
 
				-  LoopInfo *LI;
			
 
				-  /// Vectorization legality.
			
 
				-  LoopVectorizationLegality *Legal;
			
 
				-  /// Vector target information.
			
 
				-  const TargetTransformInfo &TTI;
			
 
				-  /// Target Library Info.
			
 
				-  const TargetLibraryInfo *TLI;
			
 
				-  const Function *TheFunction;
			
 
				-  // Loop Vectorize Hint.
			
 
				-  const LoopVectorizeHints *Hints;
			
 
				-};
			
 
				+  /// Returns the Induction variable.
			
 
				+  PHINode *getInduction() { return Induction; }
			
 
				 
			
 
				-/// Utility class for getting and setting loop vectorizer hints in the form
			
 
				-/// of loop metadata.
			
 
				-/// This class keeps a number of loop annotations locally (as member variables)
			
 
				-/// and can, upon request, write them back as metadata on the loop. It will
			
 
				-/// initially scan the loop for existing metadata, and will update the local
			
 
				-/// values based on information in the loop.
			
 
				-/// We cannot write all values to metadata, as the mere presence of some info,
			
 
				-/// for example 'force', means a decision has been made. So, we need to be
			
 
				-/// careful NOT to add them if the user hasn't specifically asked so.
			
 
				-class LoopVectorizeHints {
			
 
				-  enum HintKind {
			
 
				-    HK_WIDTH,
			
 
				-    HK_UNROLL,
			
 
				-    HK_FORCE
			
 
				-  };
			
 
				+  /// Returns the reduction variables found in the loop.
			
 
				+  ReductionList *getReductionVars() { return &Reductions; }
			
 
				 
			
 
				-  /// Hint - associates name and validation with the hint value.
			
 
				-  struct Hint {
			
 
				-    const char * Name;
			
 
				-    unsigned Value; // This may have to change for non-numeric values.
			
 
				-    HintKind Kind;
			
 
				+  /// Returns the induction variables found in the loop.
			
 
				+  InductionList *getInductionVars() { return &Inductions; }
			
 
				 
			
 
				-    Hint(const char * Name, unsigned Value, HintKind Kind)
			
 
				-      : Name(Name), Value(Value), Kind(Kind) { }
			
 
				+  /// Returns the widest induction type.
			
 
				+  Type *getWidestInductionType() { return WidestIndTy; }
			
 
				 
			
 
				-    bool validate(unsigned Val) {
			
 
				-      switch (Kind) {
			
 
				-      case HK_WIDTH:
			
 
				-        return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
			
 
				-      case HK_UNROLL:
			
 
				-        return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
			
 
				-      case HK_FORCE:
			
 
				-        return (Val <= 1);
			
 
				-      }
			
 
				-      return false;
			
 
				-    }
			
 
				-  };
			
 
				+  /// Returns True if V is an induction variable in this loop.
			
 
				+  bool isInductionVariable(const Value *V);
			
 
				 
			
 
				-  /// Vectorization width.
			
 
				-  Hint Width;
			
 
				-  /// Vectorization interleave factor.
			
 
				-  Hint Interleave;
			
 
				-  /// Vectorization forced
			
 
				-  Hint Force;
			
 
				+  /// Return true if the block BB needs to be predicated in order for the loop
			
 
				+  /// to be vectorized.
			
 
				+  bool blockNeedsPredication(BasicBlock *BB);
			
 
				 
			
 
				-  /// Return the loop metadata prefix.
			
 
				-  static StringRef Prefix() { return "llvm.loop."; }
			
 
				+  /// Check if this  pointer is consecutive when vectorizing. This happens
			
 
				+  /// when the last index of the GEP is the induction variable, or that the
			
 
				+  /// pointer itself is an induction variable.
			
 
				+  /// This check allows us to vectorize A[idx] into a wide load/store.
			
 
				+  /// Returns:
			
 
				+  /// 0 - Stride is unknown or non-consecutive.
			
 
				+  /// 1 - Address is consecutive.
			
 
				+  /// -1 - Address is consecutive, and decreasing.
			
 
				+  int isConsecutivePtr(Value *Ptr);
			
 
				 
			
 
				-public:
			
 
				-  enum ForceKind {
			
 
				-    FK_Undefined = -1, ///< Not selected.
			
 
				-    FK_Disabled = 0,   ///< Forcing disabled.
			
 
				-    FK_Enabled = 1,    ///< Forcing enabled.
			
 
				-  };
			
 
				+  /// Returns true if the value V is uniform within the loop.
			
 
				+  bool isUniform(Value *V);
			
 
				 
			
 
				-  LoopVectorizeHints(const Loop *L, bool DisableInterleaving)
			
 
				-      : Width("vectorize.width", VectorizerParams::VectorizationFactor,
			
 
				-              HK_WIDTH),
			
 
				-        Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
			
 
				-        Force("vectorize.enable", FK_Undefined, HK_FORCE),
			
 
				-        TheLoop(L) {
			
 
				-    // Populate values with existing loop metadata.
			
 
				-    getHintsFromMetadata();
			
 
				+  /// Returns true if this instruction will remain scalar after vectorization.
			
 
				+  bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
			
 
				 
			
 
				-    // force-vector-interleave overrides DisableInterleaving.
			
 
				-    if (VectorizerParams::isInterleaveForced())
			
 
				-      Interleave.Value = VectorizerParams::VectorizationInterleave;
			
 
				+  /// Returns the information that we collected about runtime memory check.
			
 
				+  const RuntimePointerChecking *getRuntimePointerChecking() const {
			
 
				+    return LAI->getRuntimePointerChecking();
			
 
				+  }
			
 
				 
			
 
				-    DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
			
 
				-          << "LV: Interleaving disabled by the pass manager\n");
			
 
				+  const LoopAccessInfo *getLAI() const {
			
 
				+    return LAI;
			
 
				   }
			
 
				 
			
 
				-  /// Mark the loop L as already vectorized by setting the width to 1.
			
 
				-  void setAlreadyVectorized() {
			
 
				-    Width.Value = Interleave.Value = 1;
			
 
				-    Hint Hints[] = {Width, Interleave};
			
 
				-    writeHintsToMetadata(Hints);
			
 
				+  /// \brief Check if \p Instr belongs to any interleaved access group.
			
 
				+  bool isAccessInterleaved(Instruction *Instr) {
			
 
				+    return InterleaveInfo.isInterleaved(Instr);
			
 
				   }
			
 
				 
			
 
				-  bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const {
			
 
				-    if (getForce() == LoopVectorizeHints::FK_Disabled) {
			
 
				-      DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
			
 
				-      emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
			
 
				-                                     L->getStartLoc(), emitRemark());
			
 
				-      return false;
			
 
				-    }
			
 
				+  /// \brief Get the interleaved access group that \p Instr belongs to.
			
 
				+  const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) {
			
 
				+    return InterleaveInfo.getInterleaveGroup(Instr);
			
 
				+  }
			
 
				 
			
 
				-    if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) {
			
 
				-      DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
			
 
				-      emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
			
 
				-                                     L->getStartLoc(), emitRemark());
			
 
				-      return false;
			
 
				-    }
			
 
				+  unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
			
 
				 
			
 
				-    if (getWidth() == 1 && getInterleave() == 1) {
			
 
				-      // FIXME: Add a separate metadata to indicate when the loop has already
			
 
				-      // been vectorized instead of setting width and count to 1.
			
 
				-      DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
			
 
				-      // FIXME: Add interleave.disable metadata. This will allow
			
 
				-      // vectorize.disable to be used without disabling the pass and errors
			
 
				-      // to differentiate between disabled vectorization and a width of 1.
			
 
				-      emitOptimizationRemarkAnalysis(
			
 
				-          F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
			
 
				-          "loop not vectorized: vectorization and interleaving are explicitly "
			
 
				-          "disabled, or vectorize width and interleave count are both set to "
			
 
				-          "1");
			
 
				-      return false;
			
 
				-    }
			
 
				+  bool hasStride(Value *V) { return StrideSet.count(V); }
			
 
				+  bool mustCheckStrides() { return !StrideSet.empty(); }
			
 
				+  SmallPtrSet<Value *, 8>::iterator strides_begin() {
			
 
				+    return StrideSet.begin();
			
 
				+  }
			
 
				+  SmallPtrSet<Value *, 8>::iterator strides_end() { return StrideSet.end(); }
			
 
				 
			
 
				-    return true;
			
 
				+  /// Returns true if the target machine supports masked store operation
			
 
				+  /// for the given \p DataType and kind of access to \p Ptr.
			
 
				+  bool isLegalMaskedStore(Type *DataType, Value *Ptr) {
			
 
				+    return TTI->isLegalMaskedStore(DataType, isConsecutivePtr(Ptr));
			
 
				+  }
			
 
				+  /// Returns true if the target machine supports masked load operation
			
 
				+  /// for the given \p DataType and kind of access to \p Ptr.
			
 
				+  bool isLegalMaskedLoad(Type *DataType, Value *Ptr) {
			
 
				+    return TTI->isLegalMaskedLoad(DataType, isConsecutivePtr(Ptr));
			
 
				+  }
			
 
				+  /// Returns true if vector representation of the instruction \p I
			
 
				+  /// requires mask.
			
 
				+  bool isMaskRequired(const Instruction* I) {
			
 
				+    return (MaskedOp.count(I) != 0);
			
 
				   }
			
 
				+  unsigned getNumStores() const {
			
 
				+    return LAI->getNumStores();
			
 
				+  }
			
 
				+  unsigned getNumLoads() const {
			
 
				+    return LAI->getNumLoads();
			
 
				+  }
			
 
				+  unsigned getNumPredStores() const {
			
 
				+    return NumPredStores;
			
 
				+  }
			
 
				+private:
			
 
				+  /// Check if a single basic block loop is vectorizable.
			
 
				+  /// At this point we know that this is a loop with a constant trip count
			
 
				+  /// and we only need to check individual instructions.
			
 
				+  bool canVectorizeInstrs();
			
 
				 
			
 
				-  /// Dumps all the hint information.
			
 
				-  std::string emitRemark() const {
			
 
				-    VectorizationReport R;
			
 
				-    if (Force.Value == LoopVectorizeHints::FK_Disabled)
			
 
				-      R << "vectorization is explicitly disabled";
			
 
				-    else {
			
 
				-      R << "use -Rpass-analysis=loop-vectorize for more info";
			
 
				-      if (Force.Value == LoopVectorizeHints::FK_Enabled) {
			
 
				-        R << " (Force=true";
			
 
				-        if (Width.Value != 0)
			
 
				-          R << ", Vector Width=" << Width.Value;
			
 
				-        if (Interleave.Value != 0)
			
 
				-          R << ", Interleave Count=" << Interleave.Value;
			
 
				-        R << ")";
			
 
				-      }
			
 
				-    }
			
 
				+  /// When we vectorize loops we may change the order in which
			
 
				+  /// we read and write from memory. This method checks if it is
			
 
				+  /// legal to vectorize the code, considering only memory constrains.
			
 
				+  /// Returns true if the loop is vectorizable
			
 
				+  bool canVectorizeMemory();
			
 
				 
			
 
				-    return R.str();
			
 
				+  /// Return true if we can vectorize this loop using the IF-conversion
			
 
				+  /// transformation.
			
 
				+  bool canVectorizeWithIfConvert();
			
 
				+
			
 
				+  /// Collect the variables that need to stay uniform after vectorization.
			
 
				+  void collectLoopUniforms();
			
 
				+
			
 
				+  /// Return true if all of the instructions in the block can be speculatively
			
 
				+  /// executed. \p SafePtrs is a list of addresses that are known to be legal
			
 
				+  /// and we know that we can read from them without segfault.
			
 
				+  bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
			
 
				+
			
 
				+  /// Returns the induction kind of Phi and record the step. This function may
			
 
				+  /// return NoInduction if the PHI is not an induction variable.
			
 
				+  InductionKind isInductionVariable(PHINode *Phi, ConstantInt *&StepValue);
			
 
				+
			
 
				+  /// \brief Collect memory access with loop invariant strides.
			
 
				+  ///
			
 
				+  /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
			
 
				+  /// invariant.
			
 
				+  void collectStridedAccess(Value *LoadOrStoreInst);
			
 
				+
			
 
				+  /// Report an analysis message to assist the user in diagnosing loops that are
			
 
				+  /// not vectorized.  These are handled as LoopAccessReport rather than
			
 
				+  /// VectorizationReport because the << operator of VectorizationReport returns
			
 
				+  /// LoopAccessReport.
			
 
				+  void emitAnalysis(const LoopAccessReport &Message) {
			
 
				+    LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
			
 
				   }
			
 
				 
			
 
				-  unsigned getWidth() const { return Width.Value; }
			
 
				-  unsigned getInterleave() const { return Interleave.Value; }
			
 
				-  enum ForceKind getForce() const { return (ForceKind)Force.Value; }
			
 
				+  unsigned NumPredStores;
			
 
				 
			
 
				-private:
			
 
				-  /// Find hints specified in the loop metadata and update local values.
			
 
				-  void getHintsFromMetadata() {
			
 
				-    MDNode *LoopID = TheLoop->getLoopID();
			
 
				-    if (!LoopID)
			
 
				-      return;
			
 
				+  /// The loop that we evaluate.
			
 
				+  Loop *TheLoop;
			
 
				+  /// Scev analysis.
			
 
				+  ScalarEvolution *SE;
			
 
				+  /// Target Library Info.
			
 
				+  TargetLibraryInfo *TLI;
			
 
				+  /// Parent function
			
 
				+  Function *TheFunction;
			
 
				+  /// Target Transform Info
			
 
				+  const TargetTransformInfo *TTI;
			
 
				+  /// Dominator Tree.
			
 
				+  DominatorTree *DT;
			
 
				+  // LoopAccess analysis.
			
 
				+  LoopAccessAnalysis *LAA;
			
 
				+  // And the loop-accesses info corresponding to this loop.  This pointer is
			
 
				+  // null until canVectorizeMemory sets it up.
			
 
				+  const LoopAccessInfo *LAI;
			
 
				 
			
 
				-    // First operand should refer to the loop id itself.
			
 
				-    assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
			
 
				-    assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
			
 
				+  /// The interleave access information contains groups of interleaved accesses
			
 
				+  /// with the same stride and close to each other.
			
 
				+  InterleavedAccessInfo InterleaveInfo;
			
 
				 
			
 
				-    for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
			
 
				-      const MDString *S = nullptr;
			
 
				-      SmallVector<Metadata *, 4> Args;
			
 
				+  //  ---  vectorization state --- //
			
 
				 
			
 
				-      // The expected hint is either a MDString or a MDNode with the first
			
 
				-      // operand a MDString.
			
 
				-      if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
			
 
				-        if (!MD || MD->getNumOperands() == 0)
			
 
				-          continue;
			
 
				-        S = dyn_cast<MDString>(MD->getOperand(0));
			
 
				-        for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
			
 
				-          Args.push_back(MD->getOperand(i));
			
 
				-      } else {
			
 
				-        S = dyn_cast<MDString>(LoopID->getOperand(i));
			
 
				-        assert(Args.size() == 0 && "too many arguments for MDString");
			
 
				-      }
			
 
				+  /// Holds the integer induction variable. This is the counter of the
			
 
				+  /// loop.
			
 
				+  PHINode *Induction;
			
 
				+  /// Holds the reduction variables.
			
 
				+  ReductionList Reductions;
			
 
				+  /// Holds all of the induction variables that we found in the loop.
			
 
				+  /// Notice that inductions don't need to start at zero and that induction
			
 
				+  /// variables can be pointers.
			
 
				+  InductionList Inductions;
			
 
				+  /// Holds the widest induction type encountered.
			
 
				+  Type *WidestIndTy;
			
 
				 
			
 
				-      if (!S)
			
 
				-        continue;
			
 
				+  /// Allowed outside users. This holds the reduction
			
 
				+  /// vars which can be accessed from outside the loop.
			
 
				+  SmallPtrSet<Value*, 4> AllowedExit;
			
 
				+  /// This set holds the variables which are known to be uniform after
			
 
				+  /// vectorization.
			
 
				+  SmallPtrSet<Instruction*, 4> Uniforms;
			
 
				 
			
 
				-      // Check if the hint starts with the loop metadata prefix.
			
 
				-      StringRef Name = S->getString();
			
 
				-      if (Args.size() == 1)
			
 
				-        setHint(Name, Args[0]);
			
 
				-    }
			
 
				-  }
			
 
				+  /// Can we assume the absence of NaNs.
			
 
				+  bool HasFunNoNaNAttr;
			
 
				 
			
 
				-  /// Checks string hint with one operand and set value if valid.
			
 
				-  void setHint(StringRef Name, Metadata *Arg) {
			
 
				-    if (!Name.startswith(Prefix()))
			
 
				-      return;
			
 
				-    Name = Name.substr(Prefix().size(), StringRef::npos);
			
 
				+  /// Vectorization requirements that will go through late-evaluation.
			
 
				+  LoopVectorizationRequirements *Requirements;
			
 
				 
			
 
				-    const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
			
 
				-    if (!C) return;
			
 
				-    unsigned Val = C->getZExtValue();
			
 
				+  ValueToValueMap Strides;
			
 
				+  SmallPtrSet<Value *, 8> StrideSet;
			
 
				 
			
 
				-    Hint *Hints[] = {&Width, &Interleave, &Force};
			
 
				-    for (auto H : Hints) {
			
 
				-      if (Name == H->Name) {
			
 
				-        if (H->validate(Val))
			
 
				-          H->Value = Val;
			
 
				-        else
			
 
				-          DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
			
 
				-        break;
			
 
				-      }
			
 
				-    }
			
 
				-  }
			
 
				+  /// While vectorizing these instructions we have to generate a
			
 
				+  /// call to the appropriate masked intrinsic
			
 
				+  SmallPtrSet<const Instruction*, 8> MaskedOp;
			
 
				+};
			
 
				 
			
 
				-  /// Create a new hint from name / value pair.
			
 
				-  MDNode *createHintMetadata(StringRef Name, unsigned V) const {
			
 
				-    LLVMContext &Context = TheLoop->getHeader()->getContext();
			
 
				-    Metadata *MDs[] = {MDString::get(Context, Name),
			
 
				-                       ConstantAsMetadata::get(
			
 
				-                           ConstantInt::get(Type::getInt32Ty(Context), V))};
			
 
				-    return MDNode::get(Context, MDs);
			
 
				+/// LoopVectorizationCostModel - estimates the expected speedups due to
			
 
				+/// vectorization.
			
 
				+/// In many cases vectorization is not profitable. This can happen because of
			
 
				+/// a number of reasons. In this class we mainly attempt to predict the
			
 
				+/// expected speedup/slowdowns due to the supported instruction set. We use the
			
 
				+/// TargetTransformInfo to query the different backends for the cost of
			
 
				+/// different operations.
			
 
				+class LoopVectorizationCostModel {
			
 
				+public:
			
 
				+  LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
			
 
				+                             LoopVectorizationLegality *Legal,
			
 
				+                             const TargetTransformInfo &TTI,
			
 
				+                             const TargetLibraryInfo *TLI, AssumptionCache *AC,
			
 
				+                             const Function *F, const LoopVectorizeHints *Hints)
			
 
				+      : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI),
			
 
				+        TheFunction(F), Hints(Hints) {
			
 
				+    CodeMetrics::collectEphemeralValues(L, AC, EphValues);
			
 
				   }
			
 
				 
			
 
				-  /// Matches metadata with hint name.
			
 
				-  bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes) {
			
 
				-    MDString* Name = dyn_cast<MDString>(Node->getOperand(0));
			
 
				-    if (!Name)
			
 
				-      return false;
			
 
				+  /// Information about vectorization costs
			
 
				+  struct VectorizationFactor {
			
 
				+    unsigned Width; // Vector width with best cost
			
 
				+    unsigned Cost; // Cost of the loop with that width
			
 
				+  };
			
 
				+  /// \return The most profitable vectorization factor and the cost of that VF.
			
 
				+  /// This method checks every power of two up to VF. If UserVF is not ZERO
			
 
				+  /// then this vectorization factor will be selected if vectorization is
			
 
				+  /// possible.
			
 
				+  VectorizationFactor selectVectorizationFactor(bool OptForSize);
			
 
				 
			
 
				-    for (auto H : HintTypes)
			
 
				-      if (Name->getString().endswith(H.Name))
			
 
				-        return true;
			
 
				-    return false;
			
 
				-  }
			
 
				+  /// \return The size (in bits) of the widest type in the code that
			
 
				+  /// needs to be vectorized. We ignore values that remain scalar such as
			
 
				+  /// 64 bit loop indices.
			
 
				+  unsigned getWidestType();
			
 
				 
			
 
				-  /// Sets current hints into loop metadata, keeping other values intact.
			
 
				-  void writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
			
 
				-    if (HintTypes.size() == 0)
			
 
				-      return;
			
 
				+  /// \return The desired interleave count.
			
 
				+  /// If interleave count has been specified by metadata it will be returned.
			
 
				+  /// Otherwise, the interleave count is computed and returned. VF and LoopCost
			
 
				+  /// are the selected vectorization factor and the cost of the selected VF.
			
 
				+  unsigned selectInterleaveCount(bool OptForSize, unsigned VF,
			
 
				+                                 unsigned LoopCost);
			
 
				 
			
 
				-    // Reserve the first element to LoopID (see below).
			
 
				-    SmallVector<Metadata *, 4> MDs(1);
			
 
				-    // If the loop already has metadata, then ignore the existing operands.
			
 
				-    MDNode *LoopID = TheLoop->getLoopID();
			
 
				-    if (LoopID) {
			
 
				-      for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
			
 
				-        MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
			
 
				-        // If node in update list, ignore old value.
			
 
				-        if (!matchesHintMetadataName(Node, HintTypes))
			
 
				-          MDs.push_back(Node);
			
 
				-      }
			
 
				-    }
			
 
				+  /// \return The most profitable unroll factor.
			
 
				+  /// This method finds the best unroll-factor based on register pressure and
			
 
				+  /// other parameters. VF and LoopCost are the selected vectorization factor
			
 
				+  /// and the cost of the selected VF.
			
 
				+  unsigned computeInterleaveCount(bool OptForSize, unsigned VF,
			
 
				+                                  unsigned LoopCost);
			
 
				 
			
 
				-    // Now, add the missing hints.
			
 
				-    for (auto H : HintTypes)
			
 
				-      MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
			
 
				+  /// \brief A struct that represents some properties of the register usage
			
 
				+  /// of a loop.
			
 
				+  struct RegisterUsage {
			
 
				+    /// Holds the number of loop invariant values that are used in the loop.
			
 
				+    unsigned LoopInvariantRegs;
			
 
				+    /// Holds the maximum number of concurrent live intervals in the loop.
			
 
				+    unsigned MaxLocalUsers;
			
 
				+    /// Holds the number of instructions in the loop.
			
 
				+    unsigned NumInstructions;
			
 
				+  };
			
 
				 
			
 
				-    // Replace current metadata node with new one.
			
 
				-    LLVMContext &Context = TheLoop->getHeader()->getContext();
			
 
				-    MDNode *NewLoopID = MDNode::get(Context, MDs);
			
 
				-    // Set operand 0 to refer to the loop id itself.
			
 
				-    NewLoopID->replaceOperandWith(0, NewLoopID);
			
 
				+  /// \return  information about the register usage of the loop.
			
 
				+  RegisterUsage calculateRegisterUsage();
			
 
				 
			
 
				-    TheLoop->setLoopID(NewLoopID);
			
 
				-  }
			
 
				+private:
			
 
				+  /// Returns the expected execution cost. The unit of the cost does
			
 
				+  /// not matter because we use the 'cost' units to compare different
			
 
				+  /// vector widths. The cost that is returned is *not* normalized by
			
 
				+  /// the factor width.
			
 
				+  unsigned expectedCost(unsigned VF);
			
 
				 
			
 
				-  /// The loop these hints belong to.
			
 
				-  const Loop *TheLoop;
			
 
				-};
			
 
				+  /// Returns the execution time cost of an instruction for a given vector
			
 
				+  /// width. Vector width of one means scalar.
			
 
				+  unsigned getInstructionCost(Instruction *I, unsigned VF);
			
 
				 
			
 
				-static void emitMissedWarning(Function *F, Loop *L,
			
 
				-                              const LoopVectorizeHints &LH) {
			
 
				-  emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F,
			
 
				-                               L->getStartLoc(), LH.emitRemark());
			
 
				+  /// Returns whether the instruction is a load or store and will be a emitted
			
 
				+  /// as a vector operation.
			
 
				+  bool isConsecutiveLoadOrStore(Instruction *I);
			
 
				 
			
 
				-  if (LH.getForce() == LoopVectorizeHints::FK_Enabled) {
			
 
				-    if (LH.getWidth() != 1)
			
 
				-      emitLoopVectorizeWarning(
			
 
				-          F->getContext(), *F, L->getStartLoc(),
			
 
				-          "failed explicitly specified loop vectorization");
			
 
				-    else if (LH.getInterleave() != 1)
			
 
				-      emitLoopInterleaveWarning(
			
 
				-          F->getContext(), *F, L->getStartLoc(),
			
 
				-          "failed explicitly specified loop interleaving");
			
 
				+  /// Report an analysis message to assist the user in diagnosing loops that are
			
 
				+  /// not vectorized.  These are handled as LoopAccessReport rather than
			
 
				+  /// VectorizationReport because the << operator of VectorizationReport returns
			
 
				+  /// LoopAccessReport.
			
 
				+  void emitAnalysis(const LoopAccessReport &Message) {
			
 
				+    LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
			
 
				   }
			
 
				-}
			
 
				+
			
 
				+  /// Values used only by @llvm.assume calls.
			
 
				+  SmallPtrSet<const Value *, 32> EphValues;
			
 
				+
			
 
				+  /// The loop that we evaluate.
			
 
				+  Loop *TheLoop;
			
 
				+  /// Scev analysis.
			
 
				+  ScalarEvolution *SE;
			
 
				+  /// Loop Info analysis.
			
 
				+  LoopInfo *LI;
			
 
				+  /// Vectorization legality.
			
 
				+  LoopVectorizationLegality *Legal;
			
 
				+  /// Vector target information.
			
 
				+  const TargetTransformInfo &TTI;
			
 
				+  /// Target Library Info.
			
 
				+  const TargetLibraryInfo *TLI;
			
 
				+  const Function *TheFunction;
			
 
				+  // Loop Vectorize Hint.
			
 
				+  const LoopVectorizeHints *Hints;
			
 
				+};
			
 
				 
			
 
				 /// \brief This holds vectorization requirements that must be verified late in
			
 
				 /// the process. The requirements are set by legalize and costmodel. Once