12 năm trước cách đây · 83be7b0dd3
--- a/include/llvm/Analysis/TargetTransformInfo.h
+++ b/include/llvm/Analysis/TargetTransformInfo.h
@@ -148,6 +148,11 @@ public:
 
				   /// set to false, it returns the number of scalar registers.
			
 
				   virtual unsigned getNumberOfRegisters(bool Vector) const;
			
 
				 
			
 
				+  /// \return The maximum unroll factor that the vectorizer should try to
			
 
				+  /// perform for this target. This number depends on the level of parallelism
			
 
				+  /// and the number of execution units in the CPU.
			
 
				+  virtual unsigned getMaximumUnrollFactor() const;
			
 
				+
			
 
				   /// \return The expected cost of arithmetic ops, such as mul, xor, fsub, etc.
			
 
				   virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
			
 
				 
			
--- a/lib/Analysis/TargetTransformInfo.cpp
+++ b/lib/Analysis/TargetTransformInfo.cpp
@@ -92,6 +92,10 @@ unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
 
				   return PrevTTI->getNumberOfRegisters(Vector);
			
 
				 }
			
 
				 
			
 
				+unsigned TargetTransformInfo::getMaximumUnrollFactor() const {
			
 
				+  return PrevTTI->getMaximumUnrollFactor();
			
 
				+}
			
 
				+
			
 
				 unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode,
			
 
				                                                      Type *Ty) const {
			
 
				   return PrevTTI->getArithmeticInstrCost(Opcode, Ty);
			
@@ -216,6 +220,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo {
 
				     return 8;
			
 
				   }
			
 
				 
			
 
				+  unsigned getMaximumUnrollFactor() const {
			
 
				+    return 1;
			
 
				+  }
			
 
				+
			
 
				   unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
			
 
				     return 1;
			
 
				   }
			
--- a/lib/CodeGen/BasicTargetTransformInfo.cpp
+++ b/lib/CodeGen/BasicTargetTransformInfo.cpp
@@ -83,6 +83,7 @@ public:
 
				   /// @{
			
 
				 
			
 
				   virtual unsigned getNumberOfRegisters(bool Vector) const;
			
 
				+  virtual unsigned getMaximumUnrollFactor() const;
			
 
				   virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
			
 
				   virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
			
 
				                                   int Index, Type *SubTp) const;
			
@@ -182,6 +183,10 @@ unsigned BasicTTI::getNumberOfRegisters(bool Vector) const {
 
				   return 1;
			
 
				 }
			
 
				 
			
 
				+unsigned BasicTTI::getMaximumUnrollFactor() const {
			
 
				+  return 1;
			
 
				+}
			
 
				+
			
 
				 unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
			
 
				   // Check if any of the operands are vector operands.
			
 
				   int ISD = TLI->InstructionOpcodeToISD(Opcode);
			
--- a/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -77,6 +77,31 @@ public:
 
				   virtual unsigned getIntImmCost(const APInt &Imm, Type *Ty) const;
			
 
				 
			
 
				   /// @}
			
 
				+
			
 
				+
			
 
				+  /// \name Vector TTI Implementations
			
 
				+  /// @{
			
 
				+
			
 
				+  unsigned getNumberOfRegisters(bool Vector) const {
			
 
				+    if (Vector) {
			
 
				+      if (ST->hasNEON())
			
 
				+        return 16;
			
 
				+      return 0;
			
 
				+    }
			
 
				+
			
 
				+    if (ST->isThumb1Only())
			
 
				+      return 8;
			
 
				+    return 16;
			
 
				+  }
			
 
				+
			
 
				+  unsigned getMaximumUnrollFactor() const {
			
 
				+    // These are out of order CPUs:
			
 
				+    if (ST->isCortexA15() || ST->isSwift())
			
 
				+      return 2;
			
 
				+    return 1;
			
 
				+  }
			
 
				+
			
 
				+  /// @}
			
 
				 };
			
 
				 
			
 
				 } // end anonymous namespace
			
--- a/lib/Target/X86/X86TargetTransformInfo.cpp
+++ b/lib/Target/X86/X86TargetTransformInfo.cpp
@@ -75,7 +75,6 @@ public:
 
				 
			
 
				   /// \name Scalar TTI Implementations
			
 
				   /// @{
			
 
				-
			
 
				   virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const;
			
 
				 
			
 
				   /// @}
			
@@ -84,6 +83,7 @@ public:
 
				   /// @{
			
 
				 
			
 
				   virtual unsigned getNumberOfRegisters(bool Vector) const;
			
 
				+  virtual unsigned getMaximumUnrollFactor() const;
			
 
				   virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const;
			
 
				   virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp,
			
 
				                                   int Index, Type *SubTp) const;
			
@@ -156,7 +156,6 @@ FindInConvertTable(const X86TypeConversionCostTblEntry *Tbl, unsigned len,
 
				   return -1;
			
 
				 }
			
 
				 
			
 
				-
			
 
				 X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const {
			
 
				   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
			
 
				   // TODO: Currently the __builtin_popcount() implementation using SSE3
			
@@ -171,6 +170,18 @@ unsigned X86TTI::getNumberOfRegisters(bool Vector) const {
 
				   return 8;
			
 
				 }
			
 
				 
			
 
				+unsigned X86TTI::getMaximumUnrollFactor() const {
			
 
				+  if (ST->isAtom())
			
 
				+    return 1;
			
 
				+
			
 
				+  // Sandybridge and Haswell have multiple execution ports and pipelined
			
 
				+  // vector units.
			
 
				+  if (ST->hasAVX())
			
 
				+    return 4;
			
 
				+
			
 
				+  return 2;
			
 
				+}
			
 
				+
			
 
				 unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const {
			
 
				   // Legalize the type.
			
 
				   std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty);
			
--- a/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -116,9 +116,6 @@ static const unsigned RuntimeMemoryCheckThreshold = 4;
 
				 /// This is the highest vector width that we try to generate.
			
 
				 static const unsigned MaxVectorSize = 8;
			
 
				 
			
 
				-/// This is the highest Unroll Factor.
			
 
				-static const unsigned MaxUnrollSize = 4;
			
 
				-
			
 
				 namespace {
			
 
				 
			
 
				 // Forward declarations.
			
@@ -2715,6 +2712,8 @@ LoopVectorizationCostModel::selectUnrollFactor(bool OptForSize,
 
				   UF = std::min(UF, (MaxLoopSizeThreshold / R.NumInstructions));
			
 
				 
			
 
				   // Clamp the unroll factor ranges to reasonable factors.
			
 
				+  unsigned MaxUnrollSize = TTI.getMaximumUnrollFactor();
			
 
				+  
			
 
				   if (UF > MaxUnrollSize)
			
 
				     UF = MaxUnrollSize;
			
 
				   else if (UF < 1)
			
--- a/test/Transforms/LoopVectorize/ARM/lit.local.cfg
+++ b/test/Transforms/LoopVectorize/ARM/lit.local.cfg
@@ -0,0 +1,6 @@
 
				+config.suffixes = ['.ll', '.c', '.cpp']
			
 
				+
			
 
				+targets = set(config.root.targets_to_build.split())
			
 
				+if not 'ARM' in targets:
			
 
				+    config.unsupported = True
			
 
				+
			
--- a/test/Transforms/LoopVectorize/ARM/sanity.ll
+++ b/test/Transforms/LoopVectorize/ARM/sanity.ll
@@ -0,0 +1,25 @@
 
				+; RUN: opt < %s  -loop-vectorize -mtriple=thumbv7-apple-ios3.0.0 -S
			
 
				+
			
 
				+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
			
 
				+target triple = "thumbv7-apple-ios3.0.0"
			
 
				+
			
 
				+; Make sure that we are not crashing on ARM.
			
 
				+
			
 
				+define i32 @foo(i32* nocapture %A, i32 %n) nounwind readonly ssp {
			
 
				+  %1 = icmp sgt i32 %n, 0
			
 
				+  br i1 %1, label %.lr.ph, label %._crit_edge
			
 
				+
			
 
				+.lr.ph:                                           ; preds = %0, %.lr.ph
			
 
				+  %i.02 = phi i32 [ %5, %.lr.ph ], [ 0, %0 ]
			
 
				+  %sum.01 = phi i32 [ %4, %.lr.ph ], [ 0, %0 ]
			
 
				+  %2 = getelementptr inbounds i32* %A, i32 %i.02
			
 
				+  %3 = load i32* %2, align 4
			
 
				+  %4 = add nsw i32 %3, %sum.01
			
 
				+  %5 = add nsw i32 %i.02, 1
			
 
				+  %exitcond = icmp eq i32 %5, %n
			
 
				+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
			
 
				+
			
 
				+._crit_edge:                                      ; preds = %.lr.ph, %0
			
 
				+  %sum.0.lcssa = phi i32 [ 0, %0 ], [ %4, %.lr.ph ]
			
 
				+  ret i32 %sum.0.lcssa
			
 
				+}
			
--- a/test/Transforms/LoopVectorize/X86/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
@@ -53,8 +53,6 @@ define void @example1() nounwind uwtable ssp {
 
				 ;UNROLL: @example10b
			
 
				 ;UNROLL: load <4 x i16>
			
 
				 ;UNROLL: load <4 x i16>
			
 
				-;UNROLL: load <4 x i16>
			
 
				-;UNROLL: store <4 x i32>
			
 
				 ;UNROLL: store <4 x i32>
			
 
				 ;UNROLL: store <4 x i32>
			
 
				 ;UNROLL: ret void