Browse Source

[HIP-Clang] Fat binary should not be produced for non GPU code

Skip producing the fat binary functions for HIP when no device code is present.

Reviewers: yaxunl

Differential Review: https://reviews.llvm.org/D60141



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@357520 91177308-0d34-0410-b5e6-96231b3b80d8
Aaron Enye Shi 6 years ago
parent
commit
85d2220c2a
2 changed files with 13 additions and 5 deletions
  1. 2 0
      lib/CodeGen/CGCUDANV.cpp
  2. 11 5
      test/CodeGenCUDA/device-stub.cu

+ 2 - 0
lib/CodeGen/CGCUDANV.cpp

@@ -472,6 +472,8 @@ llvm::Function *CGNVCUDARuntime::makeModuleCtorFunction() {
   StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName;
   StringRef CudaGpuBinaryFileName = CGM.getCodeGenOpts().CudaGpuBinaryFileName;
   if (CudaGpuBinaryFileName.empty() && !IsHIP)
   if (CudaGpuBinaryFileName.empty() && !IsHIP)
     return nullptr;
     return nullptr;
+  if (IsHIP && EmittedKernels.empty() && DeviceVars.empty())
+    return nullptr;
 
 
   // void __{cuda|hip}_register_globals(void* handle);
   // void __{cuda|hip}_register_globals(void* handle);
   llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();
   llvm::Function *RegisterGlobalsFunc = makeRegisterGlobalsFn();

+ 11 - 5
test/CodeGenCUDA/device-stub.cu

@@ -228,13 +228,19 @@ void hostfunc(void) { kernelfunc<<<1, 1>>>(1, 1, 1); }
 // device-side globals, but we still need to register GPU binary.
 // device-side globals, but we still need to register GPU binary.
 // Skip GPU binary string first.
 // Skip GPU binary string first.
 // CUDANOGLOBALS: @{{.*}} = private constant{{.*}}
 // CUDANOGLOBALS: @{{.*}} = private constant{{.*}}
-// HIPNOGLOBALS: @{{.*}} = internal constant{{.*}}
+// HIPNOGLOBALS-NOT: @{{.*}} = internal constant{{.*}}
 // NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals
 // NOGLOBALS-NOT: define internal void @__{{.*}}_register_globals
-// NOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor
-// NOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
+// CUDANOGLOBALS: define internal void @__[[PREFIX:cuda|hip]]_module_ctor
+// CUDANOGLOBALS: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
 // NOGLOBALS-NOT: call void @__[[PREFIX]]_register_globals
 // NOGLOBALS-NOT: call void @__[[PREFIX]]_register_globals
-// NOGLOBALS: define internal void @__[[PREFIX]]_module_dtor
-// NOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary
+// CUDANOGLOBALS: define internal void @__[[PREFIX]]_module_dtor
+// CUDANOGLOBALS: call void @__[[PREFIX]]UnregisterFatBinary
+
+// There should be no fat binary functions when no device-code is found for HIP.
+// HIPNOGLOBALS-NOT: define internal void @__[[PREFIX:cuda|hip]]_module_ctor
+// HIPNOGLOBALS-NOT: call{{.*}}[[PREFIX]]RegisterFatBinary{{.*}}__[[PREFIX]]_fatbin_wrapper
+// HIPNOGLOBALS-NOT: define internal void @__[[PREFIX]]_module_dtor
+// HIPNOGLOBALS-NOT: call void @__[[PREFIX]]UnregisterFatBinary
 
 
 // There should be no constructors/destructors if we have no GPU binary.
 // There should be no constructors/destructors if we have no GPU binary.
 // NOGPUBIN-NOT: define internal void @__[[PREFIX]]_register_globals
 // NOGPUBIN-NOT: define internal void @__[[PREFIX]]_register_globals