|
@@ -217,9 +217,9 @@ static cl::opt<unsigned> MaxNestedScalarReductionIC(
|
|
|
namespace {
|
|
|
|
|
|
// Forward declarations.
|
|
|
+class LoopVectorizeHints;
|
|
|
class LoopVectorizationLegality;
|
|
|
class LoopVectorizationCostModel;
|
|
|
-class LoopVectorizeHints;
|
|
|
class LoopVectorizationRequirements;
|
|
|
|
|
|
/// \brief This modifies LoopAccessReport to initialize message with
|
|
@@ -779,680 +779,680 @@ private:
|
|
|
const ValueToValueMap &Strides);
|
|
|
};
|
|
|
|
|
|
-/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
|
|
|
-/// to what vectorization factor.
|
|
|
-/// This class does not look at the profitability of vectorization, only the
|
|
|
-/// legality. This class has two main kinds of checks:
|
|
|
-/// * Memory checks - The code in canVectorizeMemory checks if vectorization
|
|
|
-/// will change the order of memory accesses in a way that will change the
|
|
|
-/// correctness of the program.
|
|
|
-/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
|
|
|
-/// checks for a number of different conditions, such as the availability of a
|
|
|
-/// single induction variable, that all types are supported and vectorize-able,
|
|
|
-/// etc. This code reflects the capabilities of InnerLoopVectorizer.
|
|
|
-/// This class is also used by InnerLoopVectorizer for identifying
|
|
|
-/// induction variable and the different reduction variables.
|
|
|
-class LoopVectorizationLegality {
|
|
|
-public:
|
|
|
- LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
|
|
|
- TargetLibraryInfo *TLI, AliasAnalysis *AA,
|
|
|
- Function *F, const TargetTransformInfo *TTI,
|
|
|
- LoopAccessAnalysis *LAA,
|
|
|
- LoopVectorizationRequirements *R)
|
|
|
- : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F),
|
|
|
- TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(SE, L, DT),
|
|
|
- Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false),
|
|
|
- Requirements(R) {}
|
|
|
-
|
|
|
- /// This enum represents the kinds of inductions that we support.
|
|
|
- enum InductionKind {
|
|
|
- IK_NoInduction, ///< Not an induction variable.
|
|
|
- IK_IntInduction, ///< Integer induction variable. Step = C.
|
|
|
- IK_PtrInduction ///< Pointer induction var. Step = C / sizeof(elem).
|
|
|
+/// Utility class for getting and setting loop vectorizer hints in the form
|
|
|
+/// of loop metadata.
|
|
|
+/// This class keeps a number of loop annotations locally (as member variables)
|
|
|
+/// and can, upon request, write them back as metadata on the loop. It will
|
|
|
+/// initially scan the loop for existing metadata, and will update the local
|
|
|
+/// values based on information in the loop.
|
|
|
+/// We cannot write all values to metadata, as the mere presence of some info,
|
|
|
+/// for example 'force', means a decision has been made. So, we need to be
|
|
|
+/// careful NOT to add them if the user hasn't specifically asked so.
|
|
|
+class LoopVectorizeHints {
|
|
|
+ enum HintKind {
|
|
|
+ HK_WIDTH,
|
|
|
+ HK_UNROLL,
|
|
|
+ HK_FORCE
|
|
|
};
|
|
|
|
|
|
- /// A struct for saving information about induction variables.
|
|
|
- struct InductionInfo {
|
|
|
- InductionInfo(Value *Start, InductionKind K, ConstantInt *Step)
|
|
|
- : StartValue(Start), IK(K), StepValue(Step) {
|
|
|
- assert(IK != IK_NoInduction && "Not an induction");
|
|
|
- assert(StartValue && "StartValue is null");
|
|
|
- assert(StepValue && !StepValue->isZero() && "StepValue is zero");
|
|
|
- assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
|
|
|
- "StartValue is not a pointer for pointer induction");
|
|
|
- assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
|
|
|
- "StartValue is not an integer for integer induction");
|
|
|
- assert(StepValue->getType()->isIntegerTy() &&
|
|
|
- "StepValue is not an integer");
|
|
|
- }
|
|
|
- InductionInfo()
|
|
|
- : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
|
|
|
-
|
|
|
- /// Get the consecutive direction. Returns:
|
|
|
- /// 0 - unknown or non-consecutive.
|
|
|
- /// 1 - consecutive and increasing.
|
|
|
- /// -1 - consecutive and decreasing.
|
|
|
- int getConsecutiveDirection() const {
|
|
|
- if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
|
|
|
- return StepValue->getSExtValue();
|
|
|
- return 0;
|
|
|
- }
|
|
|
-
|
|
|
- /// Compute the transformed value of Index at offset StartValue using step
|
|
|
- /// StepValue.
|
|
|
- /// For integer induction, returns StartValue + Index * StepValue.
|
|
|
- /// For pointer induction, returns StartValue[Index * StepValue].
|
|
|
- /// FIXME: The newly created binary instructions should contain nsw/nuw
|
|
|
- /// flags, which can be found from the original scalar operations.
|
|
|
- Value *transform(IRBuilder<> &B, Value *Index) const {
|
|
|
- switch (IK) {
|
|
|
- case IK_IntInduction:
|
|
|
- assert(Index->getType() == StartValue->getType() &&
|
|
|
- "Index type does not match StartValue type");
|
|
|
- if (StepValue->isMinusOne())
|
|
|
- return B.CreateSub(StartValue, Index);
|
|
|
- if (!StepValue->isOne())
|
|
|
- Index = B.CreateMul(Index, StepValue);
|
|
|
- return B.CreateAdd(StartValue, Index);
|
|
|
+ /// Hint - associates name and validation with the hint value.
|
|
|
+ struct Hint {
|
|
|
+ const char * Name;
|
|
|
+ unsigned Value; // This may have to change for non-numeric values.
|
|
|
+ HintKind Kind;
|
|
|
|
|
|
- case IK_PtrInduction:
|
|
|
- assert(Index->getType() == StepValue->getType() &&
|
|
|
- "Index type does not match StepValue type");
|
|
|
- if (StepValue->isMinusOne())
|
|
|
- Index = B.CreateNeg(Index);
|
|
|
- else if (!StepValue->isOne())
|
|
|
- Index = B.CreateMul(Index, StepValue);
|
|
|
- return B.CreateGEP(nullptr, StartValue, Index);
|
|
|
+ Hint(const char * Name, unsigned Value, HintKind Kind)
|
|
|
+ : Name(Name), Value(Value), Kind(Kind) { }
|
|
|
|
|
|
- case IK_NoInduction:
|
|
|
- return nullptr;
|
|
|
+ bool validate(unsigned Val) {
|
|
|
+ switch (Kind) {
|
|
|
+ case HK_WIDTH:
|
|
|
+ return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
|
|
|
+ case HK_UNROLL:
|
|
|
+ return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
|
|
|
+ case HK_FORCE:
|
|
|
+ return (Val <= 1);
|
|
|
}
|
|
|
- llvm_unreachable("invalid enum");
|
|
|
+ return false;
|
|
|
}
|
|
|
-
|
|
|
- /// Start value.
|
|
|
- TrackingVH<Value> StartValue;
|
|
|
- /// Induction kind.
|
|
|
- InductionKind IK;
|
|
|
- /// Step value.
|
|
|
- ConstantInt *StepValue;
|
|
|
};
|
|
|
|
|
|
- /// ReductionList contains the reduction descriptors for all
|
|
|
- /// of the reductions that were found in the loop.
|
|
|
- typedef DenseMap<PHINode *, RecurrenceDescriptor> ReductionList;
|
|
|
-
|
|
|
- /// InductionList saves induction variables and maps them to the
|
|
|
- /// induction descriptor.
|
|
|
- typedef MapVector<PHINode*, InductionInfo> InductionList;
|
|
|
-
|
|
|
- /// Returns true if it is legal to vectorize this loop.
|
|
|
- /// This does not mean that it is profitable to vectorize this
|
|
|
- /// loop, only that it is legal to do so.
|
|
|
- bool canVectorize();
|
|
|
+ /// Vectorization width.
|
|
|
+ Hint Width;
|
|
|
+ /// Vectorization interleave factor.
|
|
|
+ Hint Interleave;
|
|
|
+ /// Vectorization forced
|
|
|
+ Hint Force;
|
|
|
|
|
|
- /// Returns the Induction variable.
|
|
|
- PHINode *getInduction() { return Induction; }
|
|
|
+ /// Return the loop metadata prefix.
|
|
|
+ static StringRef Prefix() { return "llvm.loop."; }
|
|
|
|
|
|
- /// Returns the reduction variables found in the loop.
|
|
|
- ReductionList *getReductionVars() { return &Reductions; }
|
|
|
+public:
|
|
|
+ enum ForceKind {
|
|
|
+ FK_Undefined = -1, ///< Not selected.
|
|
|
+ FK_Disabled = 0, ///< Forcing disabled.
|
|
|
+ FK_Enabled = 1, ///< Forcing enabled.
|
|
|
+ };
|
|
|
|
|
|
- /// Returns the induction variables found in the loop.
|
|
|
- InductionList *getInductionVars() { return &Inductions; }
|
|
|
+ LoopVectorizeHints(const Loop *L, bool DisableInterleaving)
|
|
|
+ : Width("vectorize.width", VectorizerParams::VectorizationFactor,
|
|
|
+ HK_WIDTH),
|
|
|
+ Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
|
|
|
+ Force("vectorize.enable", FK_Undefined, HK_FORCE),
|
|
|
+ TheLoop(L) {
|
|
|
+ // Populate values with existing loop metadata.
|
|
|
+ getHintsFromMetadata();
|
|
|
|
|
|
- /// Returns the widest induction type.
|
|
|
- Type *getWidestInductionType() { return WidestIndTy; }
|
|
|
+ // force-vector-interleave overrides DisableInterleaving.
|
|
|
+ if (VectorizerParams::isInterleaveForced())
|
|
|
+ Interleave.Value = VectorizerParams::VectorizationInterleave;
|
|
|
|
|
|
- /// Returns True if V is an induction variable in this loop.
|
|
|
- bool isInductionVariable(const Value *V);
|
|
|
+ DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
|
|
|
+ << "LV: Interleaving disabled by the pass manager\n");
|
|
|
+ }
|
|
|
|
|
|
- /// Return true if the block BB needs to be predicated in order for the loop
|
|
|
- /// to be vectorized.
|
|
|
- bool blockNeedsPredication(BasicBlock *BB);
|
|
|
+ /// Mark the loop L as already vectorized by setting the width to 1.
|
|
|
+ void setAlreadyVectorized() {
|
|
|
+ Width.Value = Interleave.Value = 1;
|
|
|
+ Hint Hints[] = {Width, Interleave};
|
|
|
+ writeHintsToMetadata(Hints);
|
|
|
+ }
|
|
|
|
|
|
- /// Check if this pointer is consecutive when vectorizing. This happens
|
|
|
- /// when the last index of the GEP is the induction variable, or that the
|
|
|
- /// pointer itself is an induction variable.
|
|
|
- /// This check allows us to vectorize A[idx] into a wide load/store.
|
|
|
- /// Returns:
|
|
|
- /// 0 - Stride is unknown or non-consecutive.
|
|
|
- /// 1 - Address is consecutive.
|
|
|
- /// -1 - Address is consecutive, and decreasing.
|
|
|
- int isConsecutivePtr(Value *Ptr);
|
|
|
+ bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const {
|
|
|
+ if (getForce() == LoopVectorizeHints::FK_Disabled) {
|
|
|
+ DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
|
|
|
+ emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
|
|
|
+ L->getStartLoc(), emitRemark());
|
|
|
+ return false;
|
|
|
+ }
|
|
|
|
|
|
- /// Returns true if the value V is uniform within the loop.
|
|
|
- bool isUniform(Value *V);
|
|
|
+ if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) {
|
|
|
+ DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
|
|
|
+ emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
|
|
|
+ L->getStartLoc(), emitRemark());
|
|
|
+ return false;
|
|
|
+ }
|
|
|
|
|
|
- /// Returns true if this instruction will remain scalar after vectorization.
|
|
|
- bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
|
|
|
+ if (getWidth() == 1 && getInterleave() == 1) {
|
|
|
+ // FIXME: Add a separate metadata to indicate when the loop has already
|
|
|
+ // been vectorized instead of setting width and count to 1.
|
|
|
+ DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
|
|
|
+ // FIXME: Add interleave.disable metadata. This will allow
|
|
|
+ // vectorize.disable to be used without disabling the pass and errors
|
|
|
+ // to differentiate between disabled vectorization and a width of 1.
|
|
|
+ emitOptimizationRemarkAnalysis(
|
|
|
+ F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
|
|
|
+ "loop not vectorized: vectorization and interleaving are explicitly "
|
|
|
+ "disabled, or vectorize width and interleave count are both set to "
|
|
|
+ "1");
|
|
|
+ return false;
|
|
|
+ }
|
|
|
|
|
|
- /// Returns the information that we collected about runtime memory check.
|
|
|
- const RuntimePointerChecking *getRuntimePointerChecking() const {
|
|
|
- return LAI->getRuntimePointerChecking();
|
|
|
+ return true;
|
|
|
}
|
|
|
|
|
|
- const LoopAccessInfo *getLAI() const {
|
|
|
- return LAI;
|
|
|
- }
|
|
|
+ /// Dumps all the hint information.
|
|
|
+ std::string emitRemark() const {
|
|
|
+ VectorizationReport R;
|
|
|
+ if (Force.Value == LoopVectorizeHints::FK_Disabled)
|
|
|
+ R << "vectorization is explicitly disabled";
|
|
|
+ else {
|
|
|
+ R << "use -Rpass-analysis=loop-vectorize for more info";
|
|
|
+ if (Force.Value == LoopVectorizeHints::FK_Enabled) {
|
|
|
+ R << " (Force=true";
|
|
|
+ if (Width.Value != 0)
|
|
|
+ R << ", Vector Width=" << Width.Value;
|
|
|
+ if (Interleave.Value != 0)
|
|
|
+ R << ", Interleave Count=" << Interleave.Value;
|
|
|
+ R << ")";
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- /// \brief Check if \p Instr belongs to any interleaved access group.
|
|
|
- bool isAccessInterleaved(Instruction *Instr) {
|
|
|
- return InterleaveInfo.isInterleaved(Instr);
|
|
|
+ return R.str();
|
|
|
}
|
|
|
|
|
|
- /// \brief Get the interleaved access group that \p Instr belongs to.
|
|
|
- const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) {
|
|
|
- return InterleaveInfo.getInterleaveGroup(Instr);
|
|
|
- }
|
|
|
+ unsigned getWidth() const { return Width.Value; }
|
|
|
+ unsigned getInterleave() const { return Interleave.Value; }
|
|
|
+ enum ForceKind getForce() const { return (ForceKind)Force.Value; }
|
|
|
|
|
|
- unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
|
|
|
+private:
|
|
|
+ /// Find hints specified in the loop metadata and update local values.
|
|
|
+ void getHintsFromMetadata() {
|
|
|
+ MDNode *LoopID = TheLoop->getLoopID();
|
|
|
+ if (!LoopID)
|
|
|
+ return;
|
|
|
|
|
|
- bool hasStride(Value *V) { return StrideSet.count(V); }
|
|
|
- bool mustCheckStrides() { return !StrideSet.empty(); }
|
|
|
- SmallPtrSet<Value *, 8>::iterator strides_begin() {
|
|
|
- return StrideSet.begin();
|
|
|
- }
|
|
|
- SmallPtrSet<Value *, 8>::iterator strides_end() { return StrideSet.end(); }
|
|
|
+ // First operand should refer to the loop id itself.
|
|
|
+ assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
|
|
|
+ assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
|
|
|
|
|
|
- /// Returns true if the target machine supports masked store operation
|
|
|
- /// for the given \p DataType and kind of access to \p Ptr.
|
|
|
- bool isLegalMaskedStore(Type *DataType, Value *Ptr) {
|
|
|
- return TTI->isLegalMaskedStore(DataType, isConsecutivePtr(Ptr));
|
|
|
- }
|
|
|
- /// Returns true if the target machine supports masked load operation
|
|
|
- /// for the given \p DataType and kind of access to \p Ptr.
|
|
|
- bool isLegalMaskedLoad(Type *DataType, Value *Ptr) {
|
|
|
- return TTI->isLegalMaskedLoad(DataType, isConsecutivePtr(Ptr));
|
|
|
- }
|
|
|
- /// Returns true if vector representation of the instruction \p I
|
|
|
- /// requires mask.
|
|
|
- bool isMaskRequired(const Instruction* I) {
|
|
|
- return (MaskedOp.count(I) != 0);
|
|
|
- }
|
|
|
- unsigned getNumStores() const {
|
|
|
- return LAI->getNumStores();
|
|
|
- }
|
|
|
- unsigned getNumLoads() const {
|
|
|
- return LAI->getNumLoads();
|
|
|
- }
|
|
|
- unsigned getNumPredStores() const {
|
|
|
- return NumPredStores;
|
|
|
- }
|
|
|
-private:
|
|
|
- /// Check if a single basic block loop is vectorizable.
|
|
|
- /// At this point we know that this is a loop with a constant trip count
|
|
|
- /// and we only need to check individual instructions.
|
|
|
- bool canVectorizeInstrs();
|
|
|
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
|
|
|
+ const MDString *S = nullptr;
|
|
|
+ SmallVector<Metadata *, 4> Args;
|
|
|
|
|
|
- /// When we vectorize loops we may change the order in which
|
|
|
- /// we read and write from memory. This method checks if it is
|
|
|
- /// legal to vectorize the code, considering only memory constrains.
|
|
|
- /// Returns true if the loop is vectorizable
|
|
|
- bool canVectorizeMemory();
|
|
|
+ // The expected hint is either a MDString or a MDNode with the first
|
|
|
+ // operand a MDString.
|
|
|
+ if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
|
|
|
+ if (!MD || MD->getNumOperands() == 0)
|
|
|
+ continue;
|
|
|
+ S = dyn_cast<MDString>(MD->getOperand(0));
|
|
|
+ for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
|
|
|
+ Args.push_back(MD->getOperand(i));
|
|
|
+ } else {
|
|
|
+ S = dyn_cast<MDString>(LoopID->getOperand(i));
|
|
|
+ assert(Args.size() == 0 && "too many arguments for MDString");
|
|
|
+ }
|
|
|
|
|
|
- /// Return true if we can vectorize this loop using the IF-conversion
|
|
|
- /// transformation.
|
|
|
- bool canVectorizeWithIfConvert();
|
|
|
+ if (!S)
|
|
|
+ continue;
|
|
|
|
|
|
- /// Collect the variables that need to stay uniform after vectorization.
|
|
|
- void collectLoopUniforms();
|
|
|
+ // Check if the hint starts with the loop metadata prefix.
|
|
|
+ StringRef Name = S->getString();
|
|
|
+ if (Args.size() == 1)
|
|
|
+ setHint(Name, Args[0]);
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- /// Return true if all of the instructions in the block can be speculatively
|
|
|
- /// executed. \p SafePtrs is a list of addresses that are known to be legal
|
|
|
- /// and we know that we can read from them without segfault.
|
|
|
- bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
|
|
|
+ /// Checks string hint with one operand and set value if valid.
|
|
|
+ void setHint(StringRef Name, Metadata *Arg) {
|
|
|
+ if (!Name.startswith(Prefix()))
|
|
|
+ return;
|
|
|
+ Name = Name.substr(Prefix().size(), StringRef::npos);
|
|
|
|
|
|
- /// Returns the induction kind of Phi and record the step. This function may
|
|
|
- /// return NoInduction if the PHI is not an induction variable.
|
|
|
- InductionKind isInductionVariable(PHINode *Phi, ConstantInt *&StepValue);
|
|
|
+ const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
|
|
|
+ if (!C) return;
|
|
|
+ unsigned Val = C->getZExtValue();
|
|
|
|
|
|
- /// \brief Collect memory access with loop invariant strides.
|
|
|
- ///
|
|
|
- /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
|
|
|
- /// invariant.
|
|
|
- void collectStridedAccess(Value *LoadOrStoreInst);
|
|
|
+ Hint *Hints[] = {&Width, &Interleave, &Force};
|
|
|
+ for (auto H : Hints) {
|
|
|
+ if (Name == H->Name) {
|
|
|
+ if (H->validate(Val))
|
|
|
+ H->Value = Val;
|
|
|
+ else
|
|
|
+ DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
|
|
|
+ break;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- /// Report an analysis message to assist the user in diagnosing loops that are
|
|
|
- /// not vectorized. These are handled as LoopAccessReport rather than
|
|
|
- /// VectorizationReport because the << operator of VectorizationReport returns
|
|
|
- /// LoopAccessReport.
|
|
|
- void emitAnalysis(const LoopAccessReport &Message) {
|
|
|
- LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
|
|
|
+ /// Create a new hint from name / value pair.
|
|
|
+ MDNode *createHintMetadata(StringRef Name, unsigned V) const {
|
|
|
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
|
|
|
+ Metadata *MDs[] = {MDString::get(Context, Name),
|
|
|
+ ConstantAsMetadata::get(
|
|
|
+ ConstantInt::get(Type::getInt32Ty(Context), V))};
|
|
|
+ return MDNode::get(Context, MDs);
|
|
|
}
|
|
|
|
|
|
- unsigned NumPredStores;
|
|
|
+ /// Matches metadata with hint name.
|
|
|
+ bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes) {
|
|
|
+ MDString* Name = dyn_cast<MDString>(Node->getOperand(0));
|
|
|
+ if (!Name)
|
|
|
+ return false;
|
|
|
|
|
|
- /// The loop that we evaluate.
|
|
|
- Loop *TheLoop;
|
|
|
- /// Scev analysis.
|
|
|
- ScalarEvolution *SE;
|
|
|
- /// Target Library Info.
|
|
|
- TargetLibraryInfo *TLI;
|
|
|
- /// Parent function
|
|
|
- Function *TheFunction;
|
|
|
- /// Target Transform Info
|
|
|
- const TargetTransformInfo *TTI;
|
|
|
- /// Dominator Tree.
|
|
|
- DominatorTree *DT;
|
|
|
- // LoopAccess analysis.
|
|
|
- LoopAccessAnalysis *LAA;
|
|
|
- // And the loop-accesses info corresponding to this loop. This pointer is
|
|
|
- // null until canVectorizeMemory sets it up.
|
|
|
- const LoopAccessInfo *LAI;
|
|
|
+ for (auto H : HintTypes)
|
|
|
+ if (Name->getString().endswith(H.Name))
|
|
|
+ return true;
|
|
|
+ return false;
|
|
|
+ }
|
|
|
|
|
|
- /// The interleave access information contains groups of interleaved accesses
|
|
|
- /// with the same stride and close to each other.
|
|
|
- InterleavedAccessInfo InterleaveInfo;
|
|
|
+ /// Sets current hints into loop metadata, keeping other values intact.
|
|
|
+ void writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
|
|
|
+ if (HintTypes.size() == 0)
|
|
|
+ return;
|
|
|
|
|
|
- // --- vectorization state --- //
|
|
|
+ // Reserve the first element to LoopID (see below).
|
|
|
+ SmallVector<Metadata *, 4> MDs(1);
|
|
|
+ // If the loop already has metadata, then ignore the existing operands.
|
|
|
+ MDNode *LoopID = TheLoop->getLoopID();
|
|
|
+ if (LoopID) {
|
|
|
+ for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
|
|
|
+ MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
|
|
|
+ // If node in update list, ignore old value.
|
|
|
+ if (!matchesHintMetadataName(Node, HintTypes))
|
|
|
+ MDs.push_back(Node);
|
|
|
+ }
|
|
|
+ }
|
|
|
|
|
|
- /// Holds the integer induction variable. This is the counter of the
|
|
|
- /// loop.
|
|
|
- PHINode *Induction;
|
|
|
- /// Holds the reduction variables.
|
|
|
- ReductionList Reductions;
|
|
|
- /// Holds all of the induction variables that we found in the loop.
|
|
|
- /// Notice that inductions don't need to start at zero and that induction
|
|
|
- /// variables can be pointers.
|
|
|
- InductionList Inductions;
|
|
|
- /// Holds the widest induction type encountered.
|
|
|
- Type *WidestIndTy;
|
|
|
+ // Now, add the missing hints.
|
|
|
+ for (auto H : HintTypes)
|
|
|
+ MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
|
|
|
|
|
|
- /// Allowed outside users. This holds the reduction
|
|
|
- /// vars which can be accessed from outside the loop.
|
|
|
- SmallPtrSet<Value*, 4> AllowedExit;
|
|
|
- /// This set holds the variables which are known to be uniform after
|
|
|
- /// vectorization.
|
|
|
- SmallPtrSet<Instruction*, 4> Uniforms;
|
|
|
+ // Replace current metadata node with new one.
|
|
|
+ LLVMContext &Context = TheLoop->getHeader()->getContext();
|
|
|
+ MDNode *NewLoopID = MDNode::get(Context, MDs);
|
|
|
+ // Set operand 0 to refer to the loop id itself.
|
|
|
+ NewLoopID->replaceOperandWith(0, NewLoopID);
|
|
|
|
|
|
- /// Can we assume the absence of NaNs.
|
|
|
- bool HasFunNoNaNAttr;
|
|
|
+ TheLoop->setLoopID(NewLoopID);
|
|
|
+ }
|
|
|
|
|
|
- /// Vectorization requirements that will go through late-evaluation.
|
|
|
- LoopVectorizationRequirements *Requirements;
|
|
|
+ /// The loop these hints belong to.
|
|
|
+ const Loop *TheLoop;
|
|
|
+};
|
|
|
|
|
|
- ValueToValueMap Strides;
|
|
|
- SmallPtrSet<Value *, 8> StrideSet;
|
|
|
+static void emitMissedWarning(Function *F, Loop *L,
|
|
|
+ const LoopVectorizeHints &LH) {
|
|
|
+ emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F,
|
|
|
+ L->getStartLoc(), LH.emitRemark());
|
|
|
|
|
|
- /// While vectorizing these instructions we have to generate a
|
|
|
- /// call to the appropriate masked intrinsic
|
|
|
- SmallPtrSet<const Instruction*, 8> MaskedOp;
|
|
|
-};
|
|
|
+ if (LH.getForce() == LoopVectorizeHints::FK_Enabled) {
|
|
|
+ if (LH.getWidth() != 1)
|
|
|
+ emitLoopVectorizeWarning(
|
|
|
+ F->getContext(), *F, L->getStartLoc(),
|
|
|
+ "failed explicitly specified loop vectorization");
|
|
|
+ else if (LH.getInterleave() != 1)
|
|
|
+ emitLoopInterleaveWarning(
|
|
|
+ F->getContext(), *F, L->getStartLoc(),
|
|
|
+ "failed explicitly specified loop interleaving");
|
|
|
+ }
|
|
|
+}
|
|
|
|
|
|
-/// LoopVectorizationCostModel - estimates the expected speedups due to
|
|
|
-/// vectorization.
|
|
|
-/// In many cases vectorization is not profitable. This can happen because of
|
|
|
-/// a number of reasons. In this class we mainly attempt to predict the
|
|
|
-/// expected speedup/slowdowns due to the supported instruction set. We use the
|
|
|
-/// TargetTransformInfo to query the different backends for the cost of
|
|
|
-/// different operations.
|
|
|
-class LoopVectorizationCostModel {
|
|
|
+/// LoopVectorizationLegality checks if it is legal to vectorize a loop, and
|
|
|
+/// to what vectorization factor.
|
|
|
+/// This class does not look at the profitability of vectorization, only the
|
|
|
+/// legality. This class has two main kinds of checks:
|
|
|
+/// * Memory checks - The code in canVectorizeMemory checks if vectorization
|
|
|
+/// will change the order of memory accesses in a way that will change the
|
|
|
+/// correctness of the program.
|
|
|
+/// * Scalars checks - The code in canVectorizeInstrs and canVectorizeMemory
|
|
|
+/// checks for a number of different conditions, such as the availability of a
|
|
|
+/// single induction variable, that all types are supported and vectorize-able,
|
|
|
+/// etc. This code reflects the capabilities of InnerLoopVectorizer.
|
|
|
+/// This class is also used by InnerLoopVectorizer for identifying
|
|
|
+/// induction variable and the different reduction variables.
|
|
|
+class LoopVectorizationLegality {
|
|
|
public:
|
|
|
- LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
|
|
|
- LoopVectorizationLegality *Legal,
|
|
|
- const TargetTransformInfo &TTI,
|
|
|
- const TargetLibraryInfo *TLI, AssumptionCache *AC,
|
|
|
- const Function *F, const LoopVectorizeHints *Hints)
|
|
|
- : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI),
|
|
|
- TheFunction(F), Hints(Hints) {
|
|
|
- CodeMetrics::collectEphemeralValues(L, AC, EphValues);
|
|
|
- }
|
|
|
+ LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT,
|
|
|
+ TargetLibraryInfo *TLI, AliasAnalysis *AA,
|
|
|
+ Function *F, const TargetTransformInfo *TTI,
|
|
|
+ LoopAccessAnalysis *LAA,
|
|
|
+ LoopVectorizationRequirements *R)
|
|
|
+ : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F),
|
|
|
+ TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(SE, L, DT),
|
|
|
+ Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false),
|
|
|
+ Requirements(R) {}
|
|
|
|
|
|
- /// Information about vectorization costs
|
|
|
- struct VectorizationFactor {
|
|
|
- unsigned Width; // Vector width with best cost
|
|
|
- unsigned Cost; // Cost of the loop with that width
|
|
|
+ /// This enum represents the kinds of inductions that we support.
|
|
|
+ enum InductionKind {
|
|
|
+ IK_NoInduction, ///< Not an induction variable.
|
|
|
+ IK_IntInduction, ///< Integer induction variable. Step = C.
|
|
|
+ IK_PtrInduction ///< Pointer induction var. Step = C / sizeof(elem).
|
|
|
};
|
|
|
- /// \return The most profitable vectorization factor and the cost of that VF.
|
|
|
- /// This method checks every power of two up to VF. If UserVF is not ZERO
|
|
|
- /// then this vectorization factor will be selected if vectorization is
|
|
|
- /// possible.
|
|
|
- VectorizationFactor selectVectorizationFactor(bool OptForSize);
|
|
|
-
|
|
|
- /// \return The size (in bits) of the widest type in the code that
|
|
|
- /// needs to be vectorized. We ignore values that remain scalar such as
|
|
|
- /// 64 bit loop indices.
|
|
|
- unsigned getWidestType();
|
|
|
|
|
|
- /// \return The desired interleave count.
|
|
|
- /// If interleave count has been specified by metadata it will be returned.
|
|
|
- /// Otherwise, the interleave count is computed and returned. VF and LoopCost
|
|
|
- /// are the selected vectorization factor and the cost of the selected VF.
|
|
|
- unsigned selectInterleaveCount(bool OptForSize, unsigned VF,
|
|
|
- unsigned LoopCost);
|
|
|
+ /// A struct for saving information about induction variables.
|
|
|
+ struct InductionInfo {
|
|
|
+ InductionInfo(Value *Start, InductionKind K, ConstantInt *Step)
|
|
|
+ : StartValue(Start), IK(K), StepValue(Step) {
|
|
|
+ assert(IK != IK_NoInduction && "Not an induction");
|
|
|
+ assert(StartValue && "StartValue is null");
|
|
|
+ assert(StepValue && !StepValue->isZero() && "StepValue is zero");
|
|
|
+ assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
|
|
|
+ "StartValue is not a pointer for pointer induction");
|
|
|
+ assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
|
|
|
+ "StartValue is not an integer for integer induction");
|
|
|
+ assert(StepValue->getType()->isIntegerTy() &&
|
|
|
+ "StepValue is not an integer");
|
|
|
+ }
|
|
|
+ InductionInfo()
|
|
|
+ : StartValue(nullptr), IK(IK_NoInduction), StepValue(nullptr) {}
|
|
|
|
|
|
- /// \return The most profitable unroll factor.
|
|
|
- /// This method finds the best unroll-factor based on register pressure and
|
|
|
- /// other parameters. VF and LoopCost are the selected vectorization factor
|
|
|
- /// and the cost of the selected VF.
|
|
|
- unsigned computeInterleaveCount(bool OptForSize, unsigned VF,
|
|
|
- unsigned LoopCost);
|
|
|
+ /// Get the consecutive direction. Returns:
|
|
|
+ /// 0 - unknown or non-consecutive.
|
|
|
+ /// 1 - consecutive and increasing.
|
|
|
+ /// -1 - consecutive and decreasing.
|
|
|
+ int getConsecutiveDirection() const {
|
|
|
+ if (StepValue && (StepValue->isOne() || StepValue->isMinusOne()))
|
|
|
+ return StepValue->getSExtValue();
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
|
|
|
- /// \brief A struct that represents some properties of the register usage
|
|
|
- /// of a loop.
|
|
|
- struct RegisterUsage {
|
|
|
- /// Holds the number of loop invariant values that are used in the loop.
|
|
|
- unsigned LoopInvariantRegs;
|
|
|
- /// Holds the maximum number of concurrent live intervals in the loop.
|
|
|
- unsigned MaxLocalUsers;
|
|
|
- /// Holds the number of instructions in the loop.
|
|
|
- unsigned NumInstructions;
|
|
|
- };
|
|
|
+ /// Compute the transformed value of Index at offset StartValue using step
|
|
|
+ /// StepValue.
|
|
|
+ /// For integer induction, returns StartValue + Index * StepValue.
|
|
|
+ /// For pointer induction, returns StartValue[Index * StepValue].
|
|
|
+ /// FIXME: The newly created binary instructions should contain nsw/nuw
|
|
|
+ /// flags, which can be found from the original scalar operations.
|
|
|
+ Value *transform(IRBuilder<> &B, Value *Index) const {
|
|
|
+ switch (IK) {
|
|
|
+ case IK_IntInduction:
|
|
|
+ assert(Index->getType() == StartValue->getType() &&
|
|
|
+ "Index type does not match StartValue type");
|
|
|
+ if (StepValue->isMinusOne())
|
|
|
+ return B.CreateSub(StartValue, Index);
|
|
|
+ if (!StepValue->isOne())
|
|
|
+ Index = B.CreateMul(Index, StepValue);
|
|
|
+ return B.CreateAdd(StartValue, Index);
|
|
|
|
|
|
- /// \return information about the register usage of the loop.
|
|
|
- RegisterUsage calculateRegisterUsage();
|
|
|
+ case IK_PtrInduction:
|
|
|
+ assert(Index->getType() == StepValue->getType() &&
|
|
|
+ "Index type does not match StepValue type");
|
|
|
+ if (StepValue->isMinusOne())
|
|
|
+ Index = B.CreateNeg(Index);
|
|
|
+ else if (!StepValue->isOne())
|
|
|
+ Index = B.CreateMul(Index, StepValue);
|
|
|
+ return B.CreateGEP(nullptr, StartValue, Index);
|
|
|
|
|
|
-private:
|
|
|
- /// Returns the expected execution cost. The unit of the cost does
|
|
|
- /// not matter because we use the 'cost' units to compare different
|
|
|
- /// vector widths. The cost that is returned is *not* normalized by
|
|
|
- /// the factor width.
|
|
|
- unsigned expectedCost(unsigned VF);
|
|
|
+ case IK_NoInduction:
|
|
|
+ return nullptr;
|
|
|
+ }
|
|
|
+ llvm_unreachable("invalid enum");
|
|
|
+ }
|
|
|
|
|
|
- /// Returns the execution time cost of an instruction for a given vector
|
|
|
- /// width. Vector width of one means scalar.
|
|
|
- unsigned getInstructionCost(Instruction *I, unsigned VF);
|
|
|
+ /// Start value.
|
|
|
+ TrackingVH<Value> StartValue;
|
|
|
+ /// Induction kind.
|
|
|
+ InductionKind IK;
|
|
|
+ /// Step value.
|
|
|
+ ConstantInt *StepValue;
|
|
|
+ };
|
|
|
|
|
|
- /// Returns whether the instruction is a load or store and will be a emitted
|
|
|
- /// as a vector operation.
|
|
|
- bool isConsecutiveLoadOrStore(Instruction *I);
|
|
|
+ /// ReductionList contains the reduction descriptors for all
|
|
|
+ /// of the reductions that were found in the loop.
|
|
|
+ typedef DenseMap<PHINode *, RecurrenceDescriptor> ReductionList;
|
|
|
|
|
|
- /// Report an analysis message to assist the user in diagnosing loops that are
|
|
|
- /// not vectorized. These are handled as LoopAccessReport rather than
|
|
|
- /// VectorizationReport because the << operator of VectorizationReport returns
|
|
|
- /// LoopAccessReport.
|
|
|
- void emitAnalysis(const LoopAccessReport &Message) {
|
|
|
- LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
|
|
|
- }
|
|
|
+ /// InductionList saves induction variables and maps them to the
|
|
|
+ /// induction descriptor.
|
|
|
+ typedef MapVector<PHINode*, InductionInfo> InductionList;
|
|
|
|
|
|
- /// Values used only by @llvm.assume calls.
|
|
|
- SmallPtrSet<const Value *, 32> EphValues;
|
|
|
+ /// Returns true if it is legal to vectorize this loop.
|
|
|
+ /// This does not mean that it is profitable to vectorize this
|
|
|
+ /// loop, only that it is legal to do so.
|
|
|
+ bool canVectorize();
|
|
|
|
|
|
- /// The loop that we evaluate.
|
|
|
- Loop *TheLoop;
|
|
|
- /// Scev analysis.
|
|
|
- ScalarEvolution *SE;
|
|
|
- /// Loop Info analysis.
|
|
|
- LoopInfo *LI;
|
|
|
- /// Vectorization legality.
|
|
|
- LoopVectorizationLegality *Legal;
|
|
|
- /// Vector target information.
|
|
|
- const TargetTransformInfo &TTI;
|
|
|
- /// Target Library Info.
|
|
|
- const TargetLibraryInfo *TLI;
|
|
|
- const Function *TheFunction;
|
|
|
- // Loop Vectorize Hint.
|
|
|
- const LoopVectorizeHints *Hints;
|
|
|
-};
|
|
|
+ /// Returns the Induction variable.
|
|
|
+ PHINode *getInduction() { return Induction; }
|
|
|
|
|
|
-/// Utility class for getting and setting loop vectorizer hints in the form
|
|
|
-/// of loop metadata.
|
|
|
-/// This class keeps a number of loop annotations locally (as member variables)
|
|
|
-/// and can, upon request, write them back as metadata on the loop. It will
|
|
|
-/// initially scan the loop for existing metadata, and will update the local
|
|
|
-/// values based on information in the loop.
|
|
|
-/// We cannot write all values to metadata, as the mere presence of some info,
|
|
|
-/// for example 'force', means a decision has been made. So, we need to be
|
|
|
-/// careful NOT to add them if the user hasn't specifically asked so.
|
|
|
-class LoopVectorizeHints {
|
|
|
- enum HintKind {
|
|
|
- HK_WIDTH,
|
|
|
- HK_UNROLL,
|
|
|
- HK_FORCE
|
|
|
- };
|
|
|
+ /// Returns the reduction variables found in the loop.
|
|
|
+ ReductionList *getReductionVars() { return &Reductions; }
|
|
|
|
|
|
- /// Hint - associates name and validation with the hint value.
|
|
|
- struct Hint {
|
|
|
- const char * Name;
|
|
|
- unsigned Value; // This may have to change for non-numeric values.
|
|
|
- HintKind Kind;
|
|
|
+ /// Returns the induction variables found in the loop.
|
|
|
+ InductionList *getInductionVars() { return &Inductions; }
|
|
|
|
|
|
- Hint(const char * Name, unsigned Value, HintKind Kind)
|
|
|
- : Name(Name), Value(Value), Kind(Kind) { }
|
|
|
+ /// Returns the widest induction type.
|
|
|
+ Type *getWidestInductionType() { return WidestIndTy; }
|
|
|
|
|
|
- bool validate(unsigned Val) {
|
|
|
- switch (Kind) {
|
|
|
- case HK_WIDTH:
|
|
|
- return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
|
|
|
- case HK_UNROLL:
|
|
|
- return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
|
|
|
- case HK_FORCE:
|
|
|
- return (Val <= 1);
|
|
|
- }
|
|
|
- return false;
|
|
|
- }
|
|
|
- };
|
|
|
+ /// Returns True if V is an induction variable in this loop.
|
|
|
+ bool isInductionVariable(const Value *V);
|
|
|
|
|
|
- /// Vectorization width.
|
|
|
- Hint Width;
|
|
|
- /// Vectorization interleave factor.
|
|
|
- Hint Interleave;
|
|
|
- /// Vectorization forced
|
|
|
- Hint Force;
|
|
|
+ /// Return true if the block BB needs to be predicated in order for the loop
|
|
|
+ /// to be vectorized.
|
|
|
+ bool blockNeedsPredication(BasicBlock *BB);
|
|
|
|
|
|
- /// Return the loop metadata prefix.
|
|
|
- static StringRef Prefix() { return "llvm.loop."; }
|
|
|
+ /// Check if this pointer is consecutive when vectorizing. This happens
|
|
|
+ /// when the last index of the GEP is the induction variable, or that the
|
|
|
+ /// pointer itself is an induction variable.
|
|
|
+ /// This check allows us to vectorize A[idx] into a wide load/store.
|
|
|
+ /// Returns:
|
|
|
+ /// 0 - Stride is unknown or non-consecutive.
|
|
|
+ /// 1 - Address is consecutive.
|
|
|
+ /// -1 - Address is consecutive, and decreasing.
|
|
|
+ int isConsecutivePtr(Value *Ptr);
|
|
|
|
|
|
-public:
|
|
|
- enum ForceKind {
|
|
|
- FK_Undefined = -1, ///< Not selected.
|
|
|
- FK_Disabled = 0, ///< Forcing disabled.
|
|
|
- FK_Enabled = 1, ///< Forcing enabled.
|
|
|
- };
|
|
|
+ /// Returns true if the value V is uniform within the loop.
|
|
|
+ bool isUniform(Value *V);
|
|
|
|
|
|
- LoopVectorizeHints(const Loop *L, bool DisableInterleaving)
|
|
|
- : Width("vectorize.width", VectorizerParams::VectorizationFactor,
|
|
|
- HK_WIDTH),
|
|
|
- Interleave("interleave.count", DisableInterleaving, HK_UNROLL),
|
|
|
- Force("vectorize.enable", FK_Undefined, HK_FORCE),
|
|
|
- TheLoop(L) {
|
|
|
- // Populate values with existing loop metadata.
|
|
|
- getHintsFromMetadata();
|
|
|
+ /// Returns true if this instruction will remain scalar after vectorization.
|
|
|
+ bool isUniformAfterVectorization(Instruction* I) { return Uniforms.count(I); }
|
|
|
|
|
|
- // force-vector-interleave overrides DisableInterleaving.
|
|
|
- if (VectorizerParams::isInterleaveForced())
|
|
|
- Interleave.Value = VectorizerParams::VectorizationInterleave;
|
|
|
+ /// Returns the information that we collected about runtime memory check.
|
|
|
+ const RuntimePointerChecking *getRuntimePointerChecking() const {
|
|
|
+ return LAI->getRuntimePointerChecking();
|
|
|
+ }
|
|
|
|
|
|
- DEBUG(if (DisableInterleaving && Interleave.Value == 1) dbgs()
|
|
|
- << "LV: Interleaving disabled by the pass manager\n");
|
|
|
+ const LoopAccessInfo *getLAI() const {
|
|
|
+ return LAI;
|
|
|
}
|
|
|
|
|
|
- /// Mark the loop L as already vectorized by setting the width to 1.
|
|
|
- void setAlreadyVectorized() {
|
|
|
- Width.Value = Interleave.Value = 1;
|
|
|
- Hint Hints[] = {Width, Interleave};
|
|
|
- writeHintsToMetadata(Hints);
|
|
|
+ /// \brief Check if \p Instr belongs to any interleaved access group.
|
|
|
+ bool isAccessInterleaved(Instruction *Instr) {
|
|
|
+ return InterleaveInfo.isInterleaved(Instr);
|
|
|
}
|
|
|
|
|
|
- bool allowVectorization(Function *F, Loop *L, bool AlwaysVectorize) const {
|
|
|
- if (getForce() == LoopVectorizeHints::FK_Disabled) {
|
|
|
- DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
|
|
|
- emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
|
|
|
- L->getStartLoc(), emitRemark());
|
|
|
- return false;
|
|
|
- }
|
|
|
+ /// \brief Get the interleaved access group that \p Instr belongs to.
|
|
|
+ const InterleaveGroup *getInterleavedAccessGroup(Instruction *Instr) {
|
|
|
+ return InterleaveInfo.getInterleaveGroup(Instr);
|
|
|
+ }
|
|
|
|
|
|
- if (!AlwaysVectorize && getForce() != LoopVectorizeHints::FK_Enabled) {
|
|
|
- DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
|
|
|
- emitOptimizationRemarkAnalysis(F->getContext(), DEBUG_TYPE, *F,
|
|
|
- L->getStartLoc(), emitRemark());
|
|
|
- return false;
|
|
|
- }
|
|
|
+ unsigned getMaxSafeDepDistBytes() { return LAI->getMaxSafeDepDistBytes(); }
|
|
|
|
|
|
- if (getWidth() == 1 && getInterleave() == 1) {
|
|
|
- // FIXME: Add a separate metadata to indicate when the loop has already
|
|
|
- // been vectorized instead of setting width and count to 1.
|
|
|
- DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
|
|
|
- // FIXME: Add interleave.disable metadata. This will allow
|
|
|
- // vectorize.disable to be used without disabling the pass and errors
|
|
|
- // to differentiate between disabled vectorization and a width of 1.
|
|
|
- emitOptimizationRemarkAnalysis(
|
|
|
- F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(),
|
|
|
- "loop not vectorized: vectorization and interleaving are explicitly "
|
|
|
- "disabled, or vectorize width and interleave count are both set to "
|
|
|
- "1");
|
|
|
- return false;
|
|
|
- }
|
|
|
+ bool hasStride(Value *V) { return StrideSet.count(V); }
|
|
|
+ bool mustCheckStrides() { return !StrideSet.empty(); }
|
|
|
+ SmallPtrSet<Value *, 8>::iterator strides_begin() {
|
|
|
+ return StrideSet.begin();
|
|
|
+ }
|
|
|
+ SmallPtrSet<Value *, 8>::iterator strides_end() { return StrideSet.end(); }
|
|
|
|
|
|
- return true;
|
|
|
+ /// Returns true if the target machine supports masked store operation
|
|
|
+ /// for the given \p DataType and kind of access to \p Ptr.
|
|
|
+ bool isLegalMaskedStore(Type *DataType, Value *Ptr) {
|
|
|
+ return TTI->isLegalMaskedStore(DataType, isConsecutivePtr(Ptr));
|
|
|
+ }
|
|
|
+ /// Returns true if the target machine supports masked load operation
|
|
|
+ /// for the given \p DataType and kind of access to \p Ptr.
|
|
|
+ bool isLegalMaskedLoad(Type *DataType, Value *Ptr) {
|
|
|
+ return TTI->isLegalMaskedLoad(DataType, isConsecutivePtr(Ptr));
|
|
|
+ }
|
|
|
+ /// Returns true if vector representation of the instruction \p I
|
|
|
+ /// requires mask.
|
|
|
+ bool isMaskRequired(const Instruction* I) {
|
|
|
+ return (MaskedOp.count(I) != 0);
|
|
|
}
|
|
|
+ unsigned getNumStores() const {
|
|
|
+ return LAI->getNumStores();
|
|
|
+ }
|
|
|
+ unsigned getNumLoads() const {
|
|
|
+ return LAI->getNumLoads();
|
|
|
+ }
|
|
|
+ unsigned getNumPredStores() const {
|
|
|
+ return NumPredStores;
|
|
|
+ }
|
|
|
+private:
|
|
|
+ /// Check if a single basic block loop is vectorizable.
|
|
|
+ /// At this point we know that this is a loop with a constant trip count
|
|
|
+ /// and we only need to check individual instructions.
|
|
|
+ bool canVectorizeInstrs();
|
|
|
|
|
|
- /// Dumps all the hint information.
|
|
|
- std::string emitRemark() const {
|
|
|
- VectorizationReport R;
|
|
|
- if (Force.Value == LoopVectorizeHints::FK_Disabled)
|
|
|
- R << "vectorization is explicitly disabled";
|
|
|
- else {
|
|
|
- R << "use -Rpass-analysis=loop-vectorize for more info";
|
|
|
- if (Force.Value == LoopVectorizeHints::FK_Enabled) {
|
|
|
- R << " (Force=true";
|
|
|
- if (Width.Value != 0)
|
|
|
- R << ", Vector Width=" << Width.Value;
|
|
|
- if (Interleave.Value != 0)
|
|
|
- R << ", Interleave Count=" << Interleave.Value;
|
|
|
- R << ")";
|
|
|
- }
|
|
|
- }
|
|
|
+ /// When we vectorize loops we may change the order in which
|
|
|
+ /// we read and write from memory. This method checks if it is
|
|
|
+ /// legal to vectorize the code, considering only memory constrains.
|
|
|
+ /// Returns true if the loop is vectorizable
|
|
|
+ bool canVectorizeMemory();
|
|
|
|
|
|
- return R.str();
|
|
|
+ /// Return true if we can vectorize this loop using the IF-conversion
|
|
|
+ /// transformation.
|
|
|
+ bool canVectorizeWithIfConvert();
|
|
|
+
|
|
|
+ /// Collect the variables that need to stay uniform after vectorization.
|
|
|
+ void collectLoopUniforms();
|
|
|
+
|
|
|
+ /// Return true if all of the instructions in the block can be speculatively
|
|
|
+ /// executed. \p SafePtrs is a list of addresses that are known to be legal
|
|
|
+ /// and we know that we can read from them without segfault.
|
|
|
+ bool blockCanBePredicated(BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs);
|
|
|
+
|
|
|
+ /// Returns the induction kind of Phi and record the step. This function may
|
|
|
+ /// return NoInduction if the PHI is not an induction variable.
|
|
|
+ InductionKind isInductionVariable(PHINode *Phi, ConstantInt *&StepValue);
|
|
|
+
|
|
|
+ /// \brief Collect memory access with loop invariant strides.
|
|
|
+ ///
|
|
|
+ /// Looks for accesses like "a[i * StrideA]" where "StrideA" is loop
|
|
|
+ /// invariant.
|
|
|
+ void collectStridedAccess(Value *LoadOrStoreInst);
|
|
|
+
|
|
|
+ /// Report an analysis message to assist the user in diagnosing loops that are
|
|
|
+ /// not vectorized. These are handled as LoopAccessReport rather than
|
|
|
+ /// VectorizationReport because the << operator of VectorizationReport returns
|
|
|
+ /// LoopAccessReport.
|
|
|
+ void emitAnalysis(const LoopAccessReport &Message) {
|
|
|
+ LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
|
|
|
}
|
|
|
|
|
|
- unsigned getWidth() const { return Width.Value; }
|
|
|
- unsigned getInterleave() const { return Interleave.Value; }
|
|
|
- enum ForceKind getForce() const { return (ForceKind)Force.Value; }
|
|
|
+ unsigned NumPredStores;
|
|
|
|
|
|
-private:
|
|
|
- /// Find hints specified in the loop metadata and update local values.
|
|
|
- void getHintsFromMetadata() {
|
|
|
- MDNode *LoopID = TheLoop->getLoopID();
|
|
|
- if (!LoopID)
|
|
|
- return;
|
|
|
+ /// The loop that we evaluate.
|
|
|
+ Loop *TheLoop;
|
|
|
+ /// Scev analysis.
|
|
|
+ ScalarEvolution *SE;
|
|
|
+ /// Target Library Info.
|
|
|
+ TargetLibraryInfo *TLI;
|
|
|
+ /// Parent function
|
|
|
+ Function *TheFunction;
|
|
|
+ /// Target Transform Info
|
|
|
+ const TargetTransformInfo *TTI;
|
|
|
+ /// Dominator Tree.
|
|
|
+ DominatorTree *DT;
|
|
|
+ // LoopAccess analysis.
|
|
|
+ LoopAccessAnalysis *LAA;
|
|
|
+ // And the loop-accesses info corresponding to this loop. This pointer is
|
|
|
+ // null until canVectorizeMemory sets it up.
|
|
|
+ const LoopAccessInfo *LAI;
|
|
|
|
|
|
- // First operand should refer to the loop id itself.
|
|
|
- assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
|
|
|
- assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
|
|
|
+ /// The interleave access information contains groups of interleaved accesses
|
|
|
+ /// with the same stride and close to each other.
|
|
|
+ InterleavedAccessInfo InterleaveInfo;
|
|
|
|
|
|
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
|
|
|
- const MDString *S = nullptr;
|
|
|
- SmallVector<Metadata *, 4> Args;
|
|
|
+ // --- vectorization state --- //
|
|
|
|
|
|
- // The expected hint is either a MDString or a MDNode with the first
|
|
|
- // operand a MDString.
|
|
|
- if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
|
|
|
- if (!MD || MD->getNumOperands() == 0)
|
|
|
- continue;
|
|
|
- S = dyn_cast<MDString>(MD->getOperand(0));
|
|
|
- for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
|
|
|
- Args.push_back(MD->getOperand(i));
|
|
|
- } else {
|
|
|
- S = dyn_cast<MDString>(LoopID->getOperand(i));
|
|
|
- assert(Args.size() == 0 && "too many arguments for MDString");
|
|
|
- }
|
|
|
+ /// Holds the integer induction variable. This is the counter of the
|
|
|
+ /// loop.
|
|
|
+ PHINode *Induction;
|
|
|
+ /// Holds the reduction variables.
|
|
|
+ ReductionList Reductions;
|
|
|
+ /// Holds all of the induction variables that we found in the loop.
|
|
|
+ /// Notice that inductions don't need to start at zero and that induction
|
|
|
+ /// variables can be pointers.
|
|
|
+ InductionList Inductions;
|
|
|
+ /// Holds the widest induction type encountered.
|
|
|
+ Type *WidestIndTy;
|
|
|
|
|
|
- if (!S)
|
|
|
- continue;
|
|
|
+ /// Allowed outside users. This holds the reduction
|
|
|
+ /// vars which can be accessed from outside the loop.
|
|
|
+ SmallPtrSet<Value*, 4> AllowedExit;
|
|
|
+ /// This set holds the variables which are known to be uniform after
|
|
|
+ /// vectorization.
|
|
|
+ SmallPtrSet<Instruction*, 4> Uniforms;
|
|
|
|
|
|
- // Check if the hint starts with the loop metadata prefix.
|
|
|
- StringRef Name = S->getString();
|
|
|
- if (Args.size() == 1)
|
|
|
- setHint(Name, Args[0]);
|
|
|
- }
|
|
|
- }
|
|
|
+ /// Can we assume the absence of NaNs.
|
|
|
+ bool HasFunNoNaNAttr;
|
|
|
|
|
|
- /// Checks string hint with one operand and set value if valid.
|
|
|
- void setHint(StringRef Name, Metadata *Arg) {
|
|
|
- if (!Name.startswith(Prefix()))
|
|
|
- return;
|
|
|
- Name = Name.substr(Prefix().size(), StringRef::npos);
|
|
|
+ /// Vectorization requirements that will go through late-evaluation.
|
|
|
+ LoopVectorizationRequirements *Requirements;
|
|
|
|
|
|
- const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
|
|
|
- if (!C) return;
|
|
|
- unsigned Val = C->getZExtValue();
|
|
|
+ ValueToValueMap Strides;
|
|
|
+ SmallPtrSet<Value *, 8> StrideSet;
|
|
|
|
|
|
- Hint *Hints[] = {&Width, &Interleave, &Force};
|
|
|
- for (auto H : Hints) {
|
|
|
- if (Name == H->Name) {
|
|
|
- if (H->validate(Val))
|
|
|
- H->Value = Val;
|
|
|
- else
|
|
|
- DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
|
|
|
- break;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
+ /// While vectorizing these instructions we have to generate a
|
|
|
+ /// call to the appropriate masked intrinsic
|
|
|
+ SmallPtrSet<const Instruction*, 8> MaskedOp;
|
|
|
+};
|
|
|
|
|
|
- /// Create a new hint from name / value pair.
|
|
|
- MDNode *createHintMetadata(StringRef Name, unsigned V) const {
|
|
|
- LLVMContext &Context = TheLoop->getHeader()->getContext();
|
|
|
- Metadata *MDs[] = {MDString::get(Context, Name),
|
|
|
- ConstantAsMetadata::get(
|
|
|
- ConstantInt::get(Type::getInt32Ty(Context), V))};
|
|
|
- return MDNode::get(Context, MDs);
|
|
|
+/// LoopVectorizationCostModel - estimates the expected speedups due to
|
|
|
+/// vectorization.
|
|
|
+/// In many cases vectorization is not profitable. This can happen because of
|
|
|
+/// a number of reasons. In this class we mainly attempt to predict the
|
|
|
+/// expected speedup/slowdowns due to the supported instruction set. We use the
|
|
|
+/// TargetTransformInfo to query the different backends for the cost of
|
|
|
+/// different operations.
|
|
|
+class LoopVectorizationCostModel {
|
|
|
+public:
|
|
|
+ LoopVectorizationCostModel(Loop *L, ScalarEvolution *SE, LoopInfo *LI,
|
|
|
+ LoopVectorizationLegality *Legal,
|
|
|
+ const TargetTransformInfo &TTI,
|
|
|
+ const TargetLibraryInfo *TLI, AssumptionCache *AC,
|
|
|
+ const Function *F, const LoopVectorizeHints *Hints)
|
|
|
+ : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI),
|
|
|
+ TheFunction(F), Hints(Hints) {
|
|
|
+ CodeMetrics::collectEphemeralValues(L, AC, EphValues);
|
|
|
}
|
|
|
|
|
|
- /// Matches metadata with hint name.
|
|
|
- bool matchesHintMetadataName(MDNode *Node, ArrayRef<Hint> HintTypes) {
|
|
|
- MDString* Name = dyn_cast<MDString>(Node->getOperand(0));
|
|
|
- if (!Name)
|
|
|
- return false;
|
|
|
+ /// Information about vectorization costs
|
|
|
+ struct VectorizationFactor {
|
|
|
+ unsigned Width; // Vector width with best cost
|
|
|
+ unsigned Cost; // Cost of the loop with that width
|
|
|
+ };
|
|
|
+ /// \return The most profitable vectorization factor and the cost of that VF.
|
|
|
+ /// This method checks every power of two up to VF. If UserVF is not ZERO
|
|
|
+ /// then this vectorization factor will be selected if vectorization is
|
|
|
+ /// possible.
|
|
|
+ VectorizationFactor selectVectorizationFactor(bool OptForSize);
|
|
|
|
|
|
- for (auto H : HintTypes)
|
|
|
- if (Name->getString().endswith(H.Name))
|
|
|
- return true;
|
|
|
- return false;
|
|
|
- }
|
|
|
+ /// \return The size (in bits) of the widest type in the code that
|
|
|
+ /// needs to be vectorized. We ignore values that remain scalar such as
|
|
|
+ /// 64 bit loop indices.
|
|
|
+ unsigned getWidestType();
|
|
|
|
|
|
- /// Sets current hints into loop metadata, keeping other values intact.
|
|
|
- void writeHintsToMetadata(ArrayRef<Hint> HintTypes) {
|
|
|
- if (HintTypes.size() == 0)
|
|
|
- return;
|
|
|
+ /// \return The desired interleave count.
|
|
|
+ /// If interleave count has been specified by metadata it will be returned.
|
|
|
+ /// Otherwise, the interleave count is computed and returned. VF and LoopCost
|
|
|
+ /// are the selected vectorization factor and the cost of the selected VF.
|
|
|
+ unsigned selectInterleaveCount(bool OptForSize, unsigned VF,
|
|
|
+ unsigned LoopCost);
|
|
|
|
|
|
- // Reserve the first element to LoopID (see below).
|
|
|
- SmallVector<Metadata *, 4> MDs(1);
|
|
|
- // If the loop already has metadata, then ignore the existing operands.
|
|
|
- MDNode *LoopID = TheLoop->getLoopID();
|
|
|
- if (LoopID) {
|
|
|
- for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
|
|
|
- MDNode *Node = cast<MDNode>(LoopID->getOperand(i));
|
|
|
- // If node in update list, ignore old value.
|
|
|
- if (!matchesHintMetadataName(Node, HintTypes))
|
|
|
- MDs.push_back(Node);
|
|
|
- }
|
|
|
- }
|
|
|
+ /// \return The most profitable unroll factor.
|
|
|
+ /// This method finds the best unroll-factor based on register pressure and
|
|
|
+ /// other parameters. VF and LoopCost are the selected vectorization factor
|
|
|
+ /// and the cost of the selected VF.
|
|
|
+ unsigned computeInterleaveCount(bool OptForSize, unsigned VF,
|
|
|
+ unsigned LoopCost);
|
|
|
|
|
|
- // Now, add the missing hints.
|
|
|
- for (auto H : HintTypes)
|
|
|
- MDs.push_back(createHintMetadata(Twine(Prefix(), H.Name).str(), H.Value));
|
|
|
+ /// \brief A struct that represents some properties of the register usage
|
|
|
+ /// of a loop.
|
|
|
+ struct RegisterUsage {
|
|
|
+ /// Holds the number of loop invariant values that are used in the loop.
|
|
|
+ unsigned LoopInvariantRegs;
|
|
|
+ /// Holds the maximum number of concurrent live intervals in the loop.
|
|
|
+ unsigned MaxLocalUsers;
|
|
|
+ /// Holds the number of instructions in the loop.
|
|
|
+ unsigned NumInstructions;
|
|
|
+ };
|
|
|
|
|
|
- // Replace current metadata node with new one.
|
|
|
- LLVMContext &Context = TheLoop->getHeader()->getContext();
|
|
|
- MDNode *NewLoopID = MDNode::get(Context, MDs);
|
|
|
- // Set operand 0 to refer to the loop id itself.
|
|
|
- NewLoopID->replaceOperandWith(0, NewLoopID);
|
|
|
+ /// \return information about the register usage of the loop.
|
|
|
+ RegisterUsage calculateRegisterUsage();
|
|
|
|
|
|
- TheLoop->setLoopID(NewLoopID);
|
|
|
- }
|
|
|
+private:
|
|
|
+ /// Returns the expected execution cost. The unit of the cost does
|
|
|
+ /// not matter because we use the 'cost' units to compare different
|
|
|
+ /// vector widths. The cost that is returned is *not* normalized by
|
|
|
+ /// the factor width.
|
|
|
+ unsigned expectedCost(unsigned VF);
|
|
|
|
|
|
- /// The loop these hints belong to.
|
|
|
- const Loop *TheLoop;
|
|
|
-};
|
|
|
+ /// Returns the execution time cost of an instruction for a given vector
|
|
|
+ /// width. Vector width of one means scalar.
|
|
|
+ unsigned getInstructionCost(Instruction *I, unsigned VF);
|
|
|
|
|
|
-static void emitMissedWarning(Function *F, Loop *L,
|
|
|
- const LoopVectorizeHints &LH) {
|
|
|
- emitOptimizationRemarkMissed(F->getContext(), DEBUG_TYPE, *F,
|
|
|
- L->getStartLoc(), LH.emitRemark());
|
|
|
+ /// Returns whether the instruction is a load or store and will be a emitted
|
|
|
+ /// as a vector operation.
|
|
|
+ bool isConsecutiveLoadOrStore(Instruction *I);
|
|
|
|
|
|
- if (LH.getForce() == LoopVectorizeHints::FK_Enabled) {
|
|
|
- if (LH.getWidth() != 1)
|
|
|
- emitLoopVectorizeWarning(
|
|
|
- F->getContext(), *F, L->getStartLoc(),
|
|
|
- "failed explicitly specified loop vectorization");
|
|
|
- else if (LH.getInterleave() != 1)
|
|
|
- emitLoopInterleaveWarning(
|
|
|
- F->getContext(), *F, L->getStartLoc(),
|
|
|
- "failed explicitly specified loop interleaving");
|
|
|
+ /// Report an analysis message to assist the user in diagnosing loops that are
|
|
|
+ /// not vectorized. These are handled as LoopAccessReport rather than
|
|
|
+ /// VectorizationReport because the << operator of VectorizationReport returns
|
|
|
+ /// LoopAccessReport.
|
|
|
+ void emitAnalysis(const LoopAccessReport &Message) {
|
|
|
+ LoopAccessReport::emitAnalysis(Message, TheFunction, TheLoop, LV_NAME);
|
|
|
}
|
|
|
-}
|
|
|
+
|
|
|
+ /// Values used only by @llvm.assume calls.
|
|
|
+ SmallPtrSet<const Value *, 32> EphValues;
|
|
|
+
|
|
|
+ /// The loop that we evaluate.
|
|
|
+ Loop *TheLoop;
|
|
|
+ /// Scev analysis.
|
|
|
+ ScalarEvolution *SE;
|
|
|
+ /// Loop Info analysis.
|
|
|
+ LoopInfo *LI;
|
|
|
+ /// Vectorization legality.
|
|
|
+ LoopVectorizationLegality *Legal;
|
|
|
+ /// Vector target information.
|
|
|
+ const TargetTransformInfo &TTI;
|
|
|
+ /// Target Library Info.
|
|
|
+ const TargetLibraryInfo *TLI;
|
|
|
+ const Function *TheFunction;
|
|
|
+ // Loop Vectorize Hint.
|
|
|
+ const LoopVectorizeHints *Hints;
|
|
|
+};
|
|
|
|
|
|
/// \brief This holds vectorization requirements that must be verified late in
|
|
|
/// the process. The requirements are set by legalize and costmodel. Once
|