Преглед на файлове

[CUDA] Change initializer for CUDA device code based on CUDA documentation.

Summary:
According to CUDA documentation, global variables declared with __device__,
__constant__ can be initialized from host code, so mark them as
externally initialized. Because __shared__ variables cannot have an
initialization as part of their declaration and since the value maybe kept
across different kernel invocation, the value of __shared__ is effectively
undefined instead of zero initialized.

Wrongly using zero initializer may cause illegitimate optimization, e.g.
removing unused __constant__ variable because it's not updated in the device
code and the value is initialized with zero.

Test Plan: test/CodeGenCUDA/address-spaces.cu

Patch by Xuetian Weng

Reviewers: jholewinski, eliben, tra, jingyue

Subscribers: llvm-commits

Differential Revision: http://reviews.llvm.org/D12241

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@245786 91177308-0d34-0410-b5e6-96231b3b80d8
Jingyue Wu преди 10 години
родител
ревизия
e0848b6353
променени са 3 файла, в които са добавени 28 реда и са изтрити 8 реда
  1. 21 1
      lib/CodeGen/CodeGenModule.cpp
  2. 3 3
      test/CodeGenCUDA/address-spaces.cu
  3. 4 4
      test/CodeGenCUDA/filter-decl.cu

+ 21 - 1
lib/CodeGen/CodeGenModule.cpp

@@ -1990,7 +1990,16 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   const VarDecl *InitDecl;
   const Expr *InitExpr = D->getAnyInitializer(InitDecl);
 
-  if (!InitExpr) {
+  // CUDA E.2.4.1 "__shared__ variables cannot have an initialization as part
+  // of their declaration."
+  if (getLangOpts().CPlusPlus && getLangOpts().CUDAIsDevice
+      && D->hasAttr<CUDASharedAttr>()) {
+    if (InitExpr) {
+      Error(D->getLocation(),
+            "__shared__ variable cannot have an initialization.");
+    }
+    Init = llvm::UndefValue::get(getTypes().ConvertType(ASTTy));
+  } else if (!InitExpr) {
     // This is a tentative definition; tentative definitions are
     // implicitly initialized with { 0 }.
     //
@@ -2076,6 +2085,17 @@ void CodeGenModule::EmitGlobalVarDefinition(const VarDecl *D) {
   if (D->hasAttr<AnnotateAttr>())
     AddGlobalAnnotations(D, GV);
 
+  // CUDA B.2.1 "The __device__ qualifier declares a variable that resides on
+  // the device. [...]"
+  // CUDA B.2.2 "The __constant__ qualifier, optionally used together with
+  // __device__, declares a variable that: [...]
+  // Is accessible from all the threads within the grid and from the host
+  // through the runtime library (cudaGetSymbolAddress() / cudaGetSymbolSize()
+  // / cudaMemcpyToSymbol() / cudaMemcpyFromSymbol())."
+  if (GV && LangOpts.CUDA && LangOpts.CUDAIsDevice &&
+      (D->hasAttr<CUDAConstantAttr>() || D->hasAttr<CUDADeviceAttr>())) {
+    GV->setExternallyInitialized(true);
+  }
   GV->setInitializer(Init);
 
   // If it is safe to mark the global 'constant', do so now.

+ 3 - 3
test/CodeGenCUDA/address-spaces.cu

@@ -5,10 +5,10 @@
 
 #include "Inputs/cuda.h"
 
-// CHECK: @i = addrspace(1) global
+// CHECK: @i = addrspace(1) externally_initialized global
 __device__ int i;
 
-// CHECK: @j = addrspace(4) global
+// CHECK: @j = addrspace(4) externally_initialized global
 __constant__ int j;
 
 // CHECK: @k = addrspace(3) global
@@ -24,7 +24,7 @@ struct MyStruct {
 // CHECK: @_ZZ5func2vE1a = internal addrspace(3) global [256 x float] zeroinitializer
 // CHECK: @_ZZ5func3vE1a = internal addrspace(3) global float 0.000000e+00
 // CHECK: @_ZZ5func4vE1a = internal addrspace(3) global float 0.000000e+00
-// CHECK: @b = addrspace(3) global float 0.000000e+00
+// CHECK: @b = addrspace(3) global float undef
 
 __device__ void foo() {
   // CHECK: load i32, i32* addrspacecast (i32 addrspace(1)* @i to i32*)

+ 4 - 4
test/CodeGenCUDA/filter-decl.cu

@@ -9,12 +9,12 @@
 // CHECK-DEVICE-NOT: module asm "file scope asm is host only"
 __asm__("file scope asm is host only");
 
-// CHECK-HOST-NOT: constantdata = global
-// CHECK-DEVICE: constantdata = global
+// CHECK-HOST-NOT: constantdata = externally_initialized global
+// CHECK-DEVICE: constantdata = externally_initialized global
 __constant__ char constantdata[256];
 
-// CHECK-HOST-NOT: devicedata = global
-// CHECK-DEVICE: devicedata = global
+// CHECK-HOST-NOT: devicedata = externally_initialized global
+// CHECK-DEVICE: devicedata = externally_initialized global
 __device__ char devicedata[256];
 
 // CHECK-HOST-NOT: shareddata = global