瀏覽代碼

Fix va_arg for doubles. With this patch VAARG nodes always contain the
correct alignment information, which simplifies ExpandRes_VAARG a bit.

The patch introduces a new alignment information to TargetLoweringInfo. This is
needed since the two natural candidates cannot be used:

* The 's' in target data: If this is set to the minimal alignment of any
argument, getCallFrameTypeAlignment would return 4 for doubles on ARM for
example.
* The getTransientStackAlignment method. It is possible for an architecture to
have argument less aligned than what we maintain the stack pointer.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@108072 91177308-0d34-0410-b5e6-96231b3b80d8

Rafael Espindola 15 年之前
父節點
當前提交
cbeeae23c3

+ 1 - 1
include/llvm/CodeGen/SelectionDAG.h

@@ -582,7 +582,7 @@ public:
   /// getVAArg - VAArg produces a result and token chain, and takes a pointer
   /// and a source value as input.
   SDValue getVAArg(EVT VT, DebugLoc dl, SDValue Chain, SDValue Ptr,
-                   SDValue SV, unsigned Align = 0);
+                   SDValue SV, unsigned Align);
 
   /// getAtomic - Gets a node for an atomic op, produces result and chain and
   /// takes 3 operands

+ 17 - 0
include/llvm/Target/TargetLowering.h

@@ -686,6 +686,12 @@ public:
     return JumpBufAlignment;
   }
 
+  /// getMinStackArgumentAlignment - return the minimum stack alignment of an
+  /// argument.
+  unsigned getMinStackArgumentAlignment() const {
+    return MinStackArgumentAlignment;
+  }
+
   /// getPrefLoopAlignment - return the preferred loop alignment.
   ///
   unsigned getPrefLoopAlignment() const {
@@ -1082,6 +1088,12 @@ protected:
     PrefLoopAlignment = Align;
   }
 
+  /// setMinStackArgumentAlignment - Set the minimum stack alignment of an
+  /// argument.
+  void setMinStackArgumentAlignment(unsigned Align) {
+    MinStackArgumentAlignment = Align;
+  }
+
   /// setShouldFoldAtomicFences - Set if the target's implementation of the
   /// atomic operation intrinsics includes locking. Default is false.
   void setShouldFoldAtomicFences(bool fold) {
@@ -1515,6 +1527,11 @@ private:
   /// buffers
   unsigned JumpBufAlignment;
 
+  /// MinStackArgumentAlignment - The minimum alginment that any argument
+  /// on the stack needs to have.
+  ///
+  unsigned MinStackArgumentAlignment;
+
   /// PrefLoopAlignment - The perferred loop alignment.
   ///
   unsigned PrefLoopAlignment;

+ 3 - 1
lib/CodeGen/SelectionDAG/LegalizeDAG.cpp

@@ -2658,7 +2658,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
                                      false, false, 0);
     SDValue VAList = VAListLoad;
 
-    if (Align != 0 ) {
+    if (Align > TLI.getMinStackArgumentAlignment()) {
+      assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2");
+
       VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList,
                            DAG.getConstant(Align - 1,
                                            TLI.getPointerTy()));

+ 2 - 1
lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp

@@ -504,7 +504,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) {
   DebugLoc dl = N->getDebugLoc();
 
   SDValue NewVAARG;
-  NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2));
+  NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2),
+			  N->getConstantOperandVal(3));
 
   // Legalized the chain result - switch anything that used the old chain to
   // use the new one.

+ 2 - 1
lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

@@ -572,7 +572,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) {
 
   SmallVector<SDValue, 8> Parts(NumRegs);
   for (unsigned i = 0; i < NumRegs; ++i) {
-    Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2));
+    Parts[i] = DAG.getVAArg(RegVT, dl, Chain, Ptr, N->getOperand(2),
+                            N->getConstantOperandVal(3));
     Chain = Parts[i].getValue(1);
   }
 

+ 2 - 5
lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp

@@ -243,13 +243,10 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) {
   SDValue Chain = N->getOperand(0);
   SDValue Ptr = N->getOperand(1);
   DebugLoc dl = N->getDebugLoc();
-  const unsigned OldAlign = N->getConstantOperandVal(3);
-  const Type *Type = OVT.getTypeForEVT(*DAG.getContext());
-  const unsigned TypeAlign = TLI.getTargetData()->getABITypeAlignment(Type);
-  const unsigned Align = std::max(OldAlign, TypeAlign);
+  const unsigned Align = N->getConstantOperandVal(3);
 
   Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align);
-  Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2));
+  Hi = DAG.getVAArg(NVT, dl, Lo.getValue(1), Ptr, N->getOperand(2), 0);
 
   // Handle endianness of the load.
   if (TLI.isBigEndian())

+ 2 - 1
lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp

@@ -5672,7 +5672,8 @@ void SelectionDAGBuilder::visitVAStart(const CallInst &I) {
 void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) {
   SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(),
                            getRoot(), getValue(I.getOperand(0)),
-                           DAG.getSrcValue(I.getOperand(0)));
+                           DAG.getSrcValue(I.getOperand(0)),
+                           TLI.getTargetData()->getABITypeAlignment(I.getType()));
   setValue(&I, V);
   DAG.setRoot(V.getValue(1));
 }

+ 1 - 0
lib/CodeGen/SelectionDAG/TargetLowering.cpp

@@ -580,6 +580,7 @@ TargetLowering::TargetLowering(const TargetMachine &tm,
   JumpBufSize = 0;
   JumpBufAlignment = 0;
   PrefLoopAlignment = 0;
+  MinStackArgumentAlignment = 1;
   ShouldFoldAtomicFences = false;
 
   InitLibcallNames(LibcallRoutineNames);

+ 4 - 0
lib/Target/ARM/ARMISelLowering.cpp

@@ -539,6 +539,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
 
   maxStoresPerMemcpy = 1;   //// temporary - rewrite interface to use type
 
+  // On ARM arguments smaller than 4 bytes are extended, so all arguments
+  // are at least 4 bytes aligned.
+  setMinStackArgumentAlignment(4);
+
   if (EnableARMCodePlacement)
     benefitFromCodePlacementOpt = true;
 }

+ 23 - 1
test/CodeGen/ARM/va_arg.ll

@@ -1,10 +1,13 @@
 ; RUN: llc < %s -mtriple=armv7-none-linux-gnueabi | FileCheck %s
 ; Test that we correctly align elements when using va_arg
 
+; CHECK: test1:
+; CHECK-NOT: bfc
 ; CHECK: add	r0, r0, #7
 ; CHECK: bfc	r0, #0, #3
+; CHECK-NOT: bfc
 
-define i64 @f8(i32 %i, ...) nounwind optsize {
+define i64 @test1(i32 %i, ...) nounwind optsize {
 entry:
   %g = alloca i8*, align 4
   %g1 = bitcast i8** %g to i8*
@@ -14,6 +17,25 @@ entry:
   ret i64 %0
 }
 
+; CHECK: test2:
+; CHECK-NOT: bfc
+; CHECK: add	r0, r0, #7
+; CHECK: bfc	r0, #0, #3
+; CHECK-NOT:	bfc
+; CHECK: bx	lr
+
+define double @test2(i32 %a, i32 %b, ...) nounwind optsize {
+entry:
+  %ap = alloca i8*, align 4                       ; <i8**> [#uses=3]
+  %ap1 = bitcast i8** %ap to i8*                  ; <i8*> [#uses=2]
+  call void @llvm.va_start(i8* %ap1)
+  %0 = va_arg i8** %ap, i32                       ; <i32> [#uses=0]
+  %1 = va_arg i8** %ap, double                    ; <double> [#uses=1]
+  call void @llvm.va_end(i8* %ap1)
+  ret double %1
+}
+
+
 declare void @llvm.va_start(i8*) nounwind
 
 declare void @llvm.va_end(i8*) nounwind