9 years ago · 494843ae42
--- a/lib/CodeGen/CGCUDABuiltin.cpp
+++ b/lib/CodeGen/CGCUDABuiltin.cpp
@@ -52,10 +52,13 @@ static llvm::Function *GetVprintfDeclaration(llvm::Module &M) {
 
															 //
														
 
															 // is converted into something resembling
														
 
															 //
														
 
															-//   char* buf = alloca(...);
														
 
															-//   *reinterpret_cast<Arg1*>(buf) = arg1;
														
 
															-//   *reinterpret_cast<Arg2*>(buf + ...) = arg2;
														
 
															-//   *reinterpret_cast<Arg3*>(buf + ...) = arg3;
														
 
															+//   struct Tmp {
														
 
															+//     Arg1 a1;
														
 
															+//     Arg2 a2;
														
 
															+//     Arg3 a3;
														
 
															+//   };
														
 
															+//   char* buf = alloca(sizeof(Tmp));
														
 
															+//   *(Tmp*)buf = {a1, a2, a3};
														
 
															 //   vprintf("format string", buf);
														
 
															 //
														
 
															 // buf is aligned to the max of {alignof(Arg1), ...}.  Furthermore, each of the
														
@@ -80,48 +83,24 @@ CodeGenFunction::EmitCUDADevicePrintfCallExpr(const CallExpr *E,
 
															                E->arguments(), E->getDirectCallee(),
														
 
															                /* ParamsToSkip = */ 0);
														
 
															-  // Figure out how large of a buffer we need to hold our varargs and how
														
 
															-  // aligned the buffer needs to be.  We start iterating at Arg[1], because
														
 
															-  // that's our first vararg.
														
 
															-  unsigned BufSize = 0;
														
 
															-  unsigned BufAlign = 0;
														
 
															-  for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {
														
 
															-    const RValue& RV = Args[I].RV;
														
 
															-    llvm::Type* Ty = RV.getScalarVal()->getType();
														
 
															-
														
 
															-    auto Align = DL.getPrefTypeAlignment(Ty);
														
 
															-    BufAlign = std::max(BufAlign, Align);
														
 
															-    // Add padding required to keep the current arg aligned.
														
 
															-    BufSize = llvm::alignTo(BufSize, Align);
														
 
															-    BufSize += DL.getTypeAllocSize(Ty);
														
 
															-  }
														
 
															-
														
 
															-  // Construct and fill the buffer.
														
 
															-  llvm::Value* BufferPtr = nullptr;
														
 
															-  if (BufSize == 0) {
														
 
															+  // Construct and fill the args buffer that we'll pass to vprintf.
														
 
															+  llvm::Value *BufferPtr;
														
 
															+  if (Args.size() <= 1) {
														
 
															     // If there are no args, pass a null pointer to vprintf.
														
 
															     BufferPtr = llvm::ConstantPointerNull::get(llvm::Type::getInt8PtrTy(Ctx));
														
 
															   } else {
														
 
															-    BufferPtr = Builder.Insert(new llvm::AllocaInst(
														
 
															-        llvm::Type::getInt8Ty(Ctx), llvm::ConstantInt::get(Int32Ty, BufSize),
														
 
															-        BufAlign, "printf_arg_buf"));
														
 
															+    llvm::SmallVector<llvm::Type *, 8> ArgTypes;
														
 
															+    for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I)
														
 
															+      ArgTypes.push_back(Args[I].RV.getScalarVal()->getType());
														
 
															+    llvm::Type *AllocaTy = llvm::StructType::create(ArgTypes, "printf_args");
														
 
															+    llvm::Value *Alloca = CreateTempAlloca(AllocaTy);
														
 
															-    unsigned Offset = 0;
														
 
															     for (unsigned I = 1, NumArgs = Args.size(); I < NumArgs; ++I) {
														
 
															+      llvm::Value *P = Builder.CreateStructGEP(AllocaTy, Alloca, I - 1);
														
 
															       llvm::Value *Arg = Args[I].RV.getScalarVal();
														
 
															-      llvm::Type *Ty = Arg->getType();
														
 
															-      auto Align = DL.getPrefTypeAlignment(Ty);
														
 
															-
														
 
															-      // Pad the buffer to Arg's alignment.
														
 
															-      Offset = llvm::alignTo(Offset, Align);
														
 
															-
														
 
															-      // Store Arg into the buffer at Offset.
														
 
															-      llvm::Value *GEP =
														
 
															-          Builder.CreateGEP(BufferPtr, llvm::ConstantInt::get(Int32Ty, Offset));
														
 
															-      llvm::Value *Cast = Builder.CreateBitCast(GEP, Ty->getPointerTo());
														
 
															-      Builder.CreateAlignedStore(Arg, Cast, Align);
														
 
															-      Offset += DL.getTypeAllocSize(Ty);
														
 
															+      Builder.CreateAlignedStore(Arg, P, DL.getPrefTypeAlignment(Arg->getType()));
														
 
															     }
														
 
															+    BufferPtr = Builder.CreatePointerCast(Alloca, llvm::Type::getInt8PtrTy(Ctx));
														
 
															   }
														
 
															   // Invoke vprintf and return.
														
--- a/test/CodeGenCUDA/printf.cu
+++ b/test/CodeGenCUDA/printf.cu
@@ -9,45 +9,35 @@
 
															 extern "C" __device__ int vprintf(const char*, const char*);
														
 
															 // Check a simple call to printf end-to-end.
														
 
															+// CHECK: [[SIMPLE_PRINTF_TY:%[a-zA-Z0-9_]+]] = type { i32, i64, double }
														
 
															 __device__ int CheckSimple() {
														
 
															+  // CHECK: [[BUF:%[a-zA-Z0-9_]+]] = alloca [[SIMPLE_PRINTF_TY]]
														
 
															   // CHECK: [[FMT:%[0-9]+]] = load{{.*}}%fmt
														
 
															-  const char* fmt = "%d";
														
 
															-  // CHECK: [[BUF:%[a-zA-Z0-9_]+]] = alloca i8, i32 4, align 4
														
 
															-  // CHECK: [[PTR:%[0-9]+]] = getelementptr i8, i8* [[BUF]], i32 0
														
 
															-  // CHECK: [[CAST:%[0-9]+]] = bitcast i8* [[PTR]] to i32*
														
 
															-  // CHECK: store i32 42, i32* [[CAST]], align 4
														
 
															-  // CHECK: [[RET:%[0-9]+]] = call i32 @vprintf(i8* [[FMT]], i8* [[BUF]])
														
 
															+  const char* fmt = "%d %lld %f";
														
 
															+  // CHECK: [[PTR0:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 0
														
 
															+  // CHECK: store i32 1, i32* [[PTR0]], align 4
														
 
															+  // CHECK: [[PTR1:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 1
														
 
															+  // CHECK: store i64 2, i64* [[PTR1]], align 8
														
 
															+  // CHECK: [[PTR2:%[0-9]+]] = getelementptr inbounds [[SIMPLE_PRINTF_TY]], [[SIMPLE_PRINTF_TY]]* [[BUF]], i32 0, i32 2
														
 
															+  // CHECK: store double 3.0{{[^,]*}}, double* [[PTR2]], align 8
														
 
															+  // CHECK: [[BUF_CAST:%[0-9]+]] = bitcast [[SIMPLE_PRINTF_TY]]* [[BUF]] to i8*
														
 
															+  // CHECK: [[RET:%[0-9]+]] = call i32 @vprintf(i8* [[FMT]], i8* [[BUF_CAST]])
														
 
															   // CHECK: ret i32 [[RET]]
														
 
															-  return printf(fmt, 42);
														
 
															-}
														
 
															-
														
 
															-// Check that the args' types are promoted correctly when we call printf.
														
 
															-__device__ void CheckTypes() {
														
 
															-  // CHECK: alloca {{.*}} align 8
														
 
															-  // CHECK: getelementptr {{.*}} i32 0
														
 
															-  // CHECK: bitcast {{.*}} to i32*
														
 
															-  // CHECK: getelementptr {{.*}} i32 4
														
 
															-  // CHECK: bitcast {{.*}} to i32*
														
 
															-  // CHECK: getelementptr {{.*}} i32 8
														
 
															-  // CHECK: bitcast {{.*}} to double*
														
 
															-  // CHECK: getelementptr {{.*}} i32 16
														
 
															-  // CHECK: bitcast {{.*}} to double*
														
 
															-  printf("%d %d %f %f", (char)1, (short)2, 3.0f, 4.0);
														
 
															-}
														
 
															-
														
 
															-// Check that the args are aligned properly in the buffer.
														
 
															-__device__ void CheckAlign() {
														
 
															-  // CHECK: alloca i8, i32 40, align 8
														
 
															-  // CHECK: getelementptr {{.*}} i32 0
														
 
															-  // CHECK: getelementptr {{.*}} i32 8
														
 
															-  // CHECK: getelementptr {{.*}} i32 16
														
 
															-  // CHECK: getelementptr {{.*}} i32 20
														
 
															-  // CHECK: getelementptr {{.*}} i32 24
														
 
															-  // CHECK: getelementptr {{.*}} i32 32
														
 
															-  printf("%d %f %d %d %d %lld", 1, 2.0, 3, 4, 5, (long long)6);
														
 
															+  return printf(fmt, 1, 2ll, 3.0);
														
 
															 }
														
 
															 __device__ void CheckNoArgs() {
														
 
															   // CHECK: call i32 @vprintf({{.*}}, i8* null){{$}}
														
 
															   printf("hello, world!");
														
 
															 }
														
 
															+
														
 
															+// Check that printf's alloca happens in the entry block, not inside the if
														
 
															+// statement.
														
 
															+__device__ bool foo();
														
 
															+__device__ void CheckAllocaIsInEntryBlock() {
														
 
															+  // CHECK: alloca %printf_args
														
 
															+  // CHECK: call {{.*}} @_Z3foov()
														
 
															+  if (foo()) {
														
 
															+    printf("%d", 42);
														
 
															+  }
														
 
															+}