Browse Source

[OpenCL][CodeGen] Fix replacing memcpy with addrspacecast

Summary:
If a function argument is byval and RV is located in default or alloca address space
an optimization of creating addrspacecast instead of memcpy is performed. That is
not correct for OpenCL, where that can lead to a situation of address space casting
from __private * to __global *. See an example below:

```
typedef struct {
  int x;
} MyStruct;

void foo(MyStruct val) {}

kernel void KernelOneMember(__global MyStruct* x) {
  foo (*x);
}
```

for this code clang generated following IR:
...
%0 = load %struct.MyStruct addrspace(1)*, %struct.MyStruct addrspace(1)**
%x.addr, align 4
%1 = addrspacecast %struct.MyStruct addrspace(1)* %0 to %struct.MyStruct*
...

So the optimization was disallowed for OpenCL if RV is located in an address space
different than that of the argument (0).


Reviewers: yaxunl, Anastasia

Reviewed By: Anastasia

Subscribers: cfe-commits, asavonic

Differential Revision: https://reviews.llvm.org/D54947

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@348752 91177308-0d34-0410-b5e6-96231b3b80d8
Andrew Savonichev 6 years ago
parent
commit
453d65eef0
2 changed files with 29 additions and 7 deletions
  1. 17 4
      lib/CodeGen/CGCall.cpp
  2. 12 3
      test/CodeGenOpenCL/addr-space-struct-arg.cl

+ 17 - 4
lib/CodeGen/CGCall.cpp

@@ -3958,15 +3958,28 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
         } else if (I->hasLValue()) {
         } else if (I->hasLValue()) {
           auto LV = I->getKnownLValue();
           auto LV = I->getKnownLValue();
           auto AS = LV.getAddressSpace();
           auto AS = LV.getAddressSpace();
+
           if ((!ArgInfo.getIndirectByVal() &&
           if ((!ArgInfo.getIndirectByVal() &&
                (LV.getAlignment() >=
                (LV.getAlignment() >=
-                getContext().getTypeAlignInChars(I->Ty))) ||
-              (ArgInfo.getIndirectByVal() &&
-               ((AS != LangAS::Default && AS != LangAS::opencl_private &&
-                 AS != CGM.getASTAllocaAddressSpace())))) {
+                getContext().getTypeAlignInChars(I->Ty)))) {
+            NeedCopy = true;
+          }
+          if (!getLangOpts().OpenCL) {
+            if ((ArgInfo.getIndirectByVal() &&
+                (AS != LangAS::Default &&
+                 AS != CGM.getASTAllocaAddressSpace()))) {
+              NeedCopy = true;
+            }
+          }
+          // For OpenCL even if RV is located in default or alloca address space
+          // we don't want to perform address space cast for it.
+          else if ((ArgInfo.getIndirectByVal() &&
+                    Addr.getType()->getAddressSpace() != IRFuncTy->
+                      getParamType(FirstIRArg)->getPointerAddressSpace())) {
             NeedCopy = true;
             NeedCopy = true;
           }
           }
         }
         }
+
         if (NeedCopy) {
         if (NeedCopy) {
           // Create an aligned temporary, and copy to it.
           // Create an aligned temporary, and copy to it.
           Address AI = CreateMemTempWithoutCast(
           Address AI = CreateMemTempWithoutCast(

+ 12 - 3
test/CodeGenOpenCL/addr-space-struct-arg.cl

@@ -1,6 +1,9 @@
-// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=COM,X86 %s
-// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -finclude-default-header -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s
-// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -finclude-default-header -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -ffake-address-space-map -triple i686-pc-darwin | FileCheck -enable-var-scope -check-prefixes=COM,X86 %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL2.0 -O0 -triple amdgcn | FileCheck -enable-var-scope -check-prefixes=COM,AMDGCN,AMDGCN20 %s
+// RUN: %clang_cc1 %s -emit-llvm -o - -cl-std=CL1.2 -O0 -triple spir-unknown-unknown-unknown | FileCheck -enable-var-scope -check-prefixes=SPIR %s
+
+typedef int int2 __attribute__((ext_vector_type(2)));
 
 
 typedef struct {
 typedef struct {
   int cells[9];
   int cells[9];
@@ -130,6 +133,12 @@ kernel void KernelOneMember(struct StructOneMember u) {
   FuncOneMember(u);
   FuncOneMember(u);
 }
 }
 
 
+// SPIR: call void @llvm.memcpy.p0i8.p1i8.i32
+// SPIR-NOT: addrspacecast
+kernel void KernelOneMemberSpir(global struct StructOneMember* u) {
+  FuncOneMember(*u);
+}
+
 // AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeOneMember(
 // AMDGCN-LABEL: define amdgpu_kernel void @KernelLargeOneMember(
 // AMDGCN:  %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
 // AMDGCN:  %[[U:.*]] = alloca %struct.LargeStructOneMember, align 8, addrspace(5)
 // AMDGCN:  store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8
 // AMDGCN:  store %struct.LargeStructOneMember %u.coerce, %struct.LargeStructOneMember addrspace(5)* %[[U]], align 8