Jelajahi Sumber

[OPENMP] Emit sizes/init ptrs etc. data for task reductions before
using.

We may emit the code in wrong order because of incorrect implementation
of the runtime functions for task reductions. Threadprivate storages may
be initialized after real initialization of the reduction items. Patch
fixes this problem.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@327008 91177308-0d34-0410-b5e6-96231b3b80d8

Alexey Bataev 7 tahun lalu
induk
melakukan
f9b96cd2d9

+ 10 - 10
lib/CodeGen/CGStmtOpenMP.cpp

@@ -2886,6 +2886,11 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
       for (unsigned Cnt = 0, E = Data.ReductionVars.size(); Cnt < E; ++Cnt) {
         RedCG.emitSharedLValue(CGF, Cnt);
         RedCG.emitAggregateType(CGF, Cnt);
+        // FIXME: This must removed once the runtime library is fixed.
+        // Emit required threadprivate variables for
+        // initilizer/combiner/finalizer.
+        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
+                                                           RedCG, Cnt);
         Address Replacement = CGF.CGM.getOpenMPRuntime().getTaskReductionItem(
             CGF, S.getLocStart(), ReductionsPtr, RedCG.getSharedLValue(Cnt));
         Replacement =
@@ -2898,11 +2903,6 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
         Scope.addPrivate(RedCG.getBaseDecl(Cnt),
                          [Replacement]() { return Replacement; });
-        // FIXME: This must removed once the runtime library is fixed.
-        // Emit required threadprivate variables for
-        // initilizer/combiner/finalizer.
-        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
-                                                           RedCG, Cnt);
       }
     }
     // Privatize all private variables except for in_reduction items.
@@ -2935,6 +2935,11 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
         RedCG.emitAggregateType(CGF, Cnt);
         // The taskgroup descriptor variable is always implicit firstprivate and
         // privatized already during procoessing of the firstprivates.
+        // FIXME: This must removed once the runtime library is fixed.
+        // Emit required threadprivate variables for
+        // initilizer/combiner/finalizer.
+        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
+                                                           RedCG, Cnt);
         llvm::Value *ReductionsPtr =
             CGF.EmitLoadOfScalar(CGF.EmitLValue(TaskgroupDescriptors[Cnt]),
                                  TaskgroupDescriptors[Cnt]->getExprLoc());
@@ -2949,11 +2954,6 @@ void CodeGenFunction::EmitOMPTaskBasedDirective(
         Replacement = RedCG.adjustPrivateAddress(CGF, Cnt, Replacement);
         InRedScope.addPrivate(RedCG.getBaseDecl(Cnt),
                               [Replacement]() { return Replacement; });
-        // FIXME: This must removed once the runtime library is fixed.
-        // Emit required threadprivate variables for
-        // initilizer/combiner/finalizer.
-        CGF.CGM.getOpenMPRuntime().emitTaskReductionFixups(CGF, S.getLocStart(),
-                                                           RedCG, Cnt);
       }
     }
     (void)InRedScope.Privatize();

+ 2 - 0
test/OpenMP/task_in_reduction_codegen.cpp

@@ -78,9 +78,11 @@ int main(int argc, char **argv) {
 // CHECK-NEXT:  call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD1]], i8* [[A_PTR]])
 // CHECK:       [[D_REF:%.+]] = getelementptr inbounds %
 // CHECK-NEXT:  [[D_ADDR:%.+]] = load i16*, i16** [[D_REF]],
+// CHECK:       call i8* @__kmpc_threadprivate_cached(
 // CHECK:       [[TD2:%.+]] = load i8*, i8** [[TD2_ADDR]],
 // CHECK-NEXT:  [[D_PTR:%.+]] = bitcast i16* [[D_ADDR]] to i8*
 // CHECK-NEXT:  call i8* @__kmpc_task_reduction_get_th_data(i32 [[GTID]], i8* [[TD2]], i8* [[D_PTR]])
 // CHECK:       add nsw i32
 // CHECK:       store i32 %
+// CHECK-NOT:   call i8* @__kmpc_threadprivate_cached(
 #endif

+ 11 - 0
test/OpenMP/taskloop_reduction_codegen.cpp

@@ -199,6 +199,17 @@ sum = 0.0;
 // CHECK: store float %{{.+}}, float* %
 // CHECK: ret void
 
+// CHECK-NOT: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call i8* @__kmpc_task_reduction_get_th_data(
+// CHECK: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call i8* @__kmpc_task_reduction_get_th_data(
+// CHECK-NOT: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call i8* @__kmpc_task_reduction_get_th_data(
+// CHECK: call i8* @__kmpc_threadprivate_cached(
+// CHECK: call i8* @__kmpc_task_reduction_get_th_data(
+// CHECK-NOT: call i8* @__kmpc_threadprivate_cached(
+
 // CHECK-DAG: distinct !DISubprogram(linkageName: "[[TASK]]", scope: !
 // CHECK-DAG: !DISubprogram(linkageName: "[[RED_INIT1]]"
 // CHECK-DAG: !DISubprogram(linkageName: "[[RED_COMB1]]"