|
@@ -233,9 +233,8 @@ public:
|
|
|
|
|
|
LValue CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction &CGF) {
|
|
|
return CGF.MakeNaturalAlignAddrLValue(
|
|
|
- CGF.Builder.CreateAlignedLoad(
|
|
|
- CGF.GetAddrOfLocalVar(getThreadIDVariable()),
|
|
|
- CGF.PointerAlignInBytes),
|
|
|
+ CGF.Builder.CreateLoad(
|
|
|
+ CGF.GetAddrOfLocalVar(getThreadIDVariable())),
|
|
|
getThreadIDVariable()
|
|
|
->getType()
|
|
|
->castAs<PointerType>()
|
|
@@ -258,7 +257,7 @@ void CGOpenMPRegionInfo::EmitBody(CodeGenFunction &CGF, const Stmt * /*S*/) {
|
|
|
|
|
|
LValue CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
|
|
|
CodeGenFunction &CGF) {
|
|
|
- return CGF.MakeNaturalAlignAddrLValue(
|
|
|
+ return CGF.MakeAddrLValue(
|
|
|
CGF.GetAddrOfLocalVar(getThreadIDVariable()),
|
|
|
getThreadIDVariable()->getType());
|
|
|
}
|
|
@@ -280,6 +279,25 @@ void CGOpenMPRuntime::clear() {
|
|
|
InternalVars.clear();
|
|
|
}
|
|
|
|
|
|
+// Layout information for ident_t.
|
|
|
+static CharUnits getIdentAlign(CodeGenModule &CGM) {
|
|
|
+ return CGM.getPointerAlign();
|
|
|
+}
|
|
|
+static CharUnits getIdentSize(CodeGenModule &CGM) {
|
|
|
+ assert((4 * CGM.getPointerSize()).isMultipleOf(CGM.getPointerAlign()));
|
|
|
+ return CharUnits::fromQuantity(16) + CGM.getPointerSize();
|
|
|
+}
|
|
|
+static CharUnits getOffsetOfIdentField(CGOpenMPRuntime::IdentFieldIndex Field) {
|
|
|
+ // All the fields except the last are i32, so this works beautifully.
|
|
|
+ return unsigned(Field) * CharUnits::fromQuantity(4);
|
|
|
+}
|
|
|
+static Address createIdentFieldGEP(CodeGenFunction &CGF, Address Addr,
|
|
|
+ CGOpenMPRuntime::IdentFieldIndex Field,
|
|
|
+ const llvm::Twine &Name = "") {
|
|
|
+ auto Offset = getOffsetOfIdentField(Field);
|
|
|
+ return CGF.Builder.CreateStructGEP(Addr, Field, Offset, Name);
|
|
|
+}
|
|
|
+
|
|
|
llvm::Value *CGOpenMPRuntime::emitParallelOutlinedFunction(
|
|
|
const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
|
|
|
OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
|
|
@@ -305,8 +323,8 @@ llvm::Value *CGOpenMPRuntime::emitTaskOutlinedFunction(
|
|
|
return CGF.GenerateCapturedStmtFunction(*CS);
|
|
|
}
|
|
|
|
|
|
-llvm::Value *
|
|
|
-CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
|
|
|
+Address CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
|
|
|
+ CharUnits Align = getIdentAlign(CGM);
|
|
|
llvm::Value *Entry = OpenMPDefaultLocMap.lookup(Flags);
|
|
|
if (!Entry) {
|
|
|
if (!DefaultOpenMPPSource) {
|
|
@@ -315,7 +333,7 @@ CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
|
|
|
// Taken from
|
|
|
// http://llvm.org/svn/llvm-project/openmp/trunk/runtime/src/kmp_str.c
|
|
|
DefaultOpenMPPSource =
|
|
|
- CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;");
|
|
|
+ CGM.GetAddrOfConstantCString(";unknown;unknown;0;0;;").getPointer();
|
|
|
DefaultOpenMPPSource =
|
|
|
llvm::ConstantExpr::getBitCast(DefaultOpenMPPSource, CGM.Int8PtrTy);
|
|
|
}
|
|
@@ -323,6 +341,7 @@ CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
|
|
|
CGM.getModule(), IdentTy, /*isConstant*/ true,
|
|
|
llvm::GlobalValue::PrivateLinkage, /*Initializer*/ nullptr);
|
|
|
DefaultOpenMPLocation->setUnnamedAddr(true);
|
|
|
+ DefaultOpenMPLocation->setAlignment(Align.getQuantity());
|
|
|
|
|
|
llvm::Constant *Zero = llvm::ConstantInt::get(CGM.Int32Ty, 0, true);
|
|
|
llvm::Constant *Values[] = {Zero,
|
|
@@ -330,10 +349,9 @@ CGOpenMPRuntime::getOrCreateDefaultLocation(OpenMPLocationFlags Flags) {
|
|
|
Zero, Zero, DefaultOpenMPPSource};
|
|
|
llvm::Constant *Init = llvm::ConstantStruct::get(IdentTy, Values);
|
|
|
DefaultOpenMPLocation->setInitializer(Init);
|
|
|
- OpenMPDefaultLocMap[Flags] = DefaultOpenMPLocation;
|
|
|
- return DefaultOpenMPLocation;
|
|
|
+ OpenMPDefaultLocMap[Flags] = Entry = DefaultOpenMPLocation;
|
|
|
}
|
|
|
- return Entry;
|
|
|
+ return Address(Entry, Align);
|
|
|
}
|
|
|
|
|
|
llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
|
|
@@ -342,34 +360,33 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
|
|
|
// If no debug info is generated - return global default location.
|
|
|
if (CGM.getCodeGenOpts().getDebugInfo() == CodeGenOptions::NoDebugInfo ||
|
|
|
Loc.isInvalid())
|
|
|
- return getOrCreateDefaultLocation(Flags);
|
|
|
+ return getOrCreateDefaultLocation(Flags).getPointer();
|
|
|
|
|
|
assert(CGF.CurFn && "No function in current CodeGenFunction.");
|
|
|
|
|
|
- llvm::Value *LocValue = nullptr;
|
|
|
+ Address LocValue = Address::invalid();
|
|
|
auto I = OpenMPLocThreadIDMap.find(CGF.CurFn);
|
|
|
if (I != OpenMPLocThreadIDMap.end())
|
|
|
- LocValue = I->second.DebugLoc;
|
|
|
+ LocValue = Address(I->second.DebugLoc, getIdentAlign(CGF.CGM));
|
|
|
+
|
|
|
// OpenMPLocThreadIDMap may have null DebugLoc and non-null ThreadID, if
|
|
|
// GetOpenMPThreadID was called before this routine.
|
|
|
- if (LocValue == nullptr) {
|
|
|
+ if (!LocValue.isValid()) {
|
|
|
// Generate "ident_t .kmpc_loc.addr;"
|
|
|
- llvm::AllocaInst *AI = CGF.CreateTempAlloca(IdentTy, ".kmpc_loc.addr");
|
|
|
- AI->setAlignment(CGM.getDataLayout().getPrefTypeAlignment(IdentTy));
|
|
|
+ Address AI = CGF.CreateTempAlloca(IdentTy, getIdentAlign(CGF.CGM),
|
|
|
+ ".kmpc_loc.addr");
|
|
|
auto &Elem = OpenMPLocThreadIDMap.FindAndConstruct(CGF.CurFn);
|
|
|
- Elem.second.DebugLoc = AI;
|
|
|
+ Elem.second.DebugLoc = AI.getPointer();
|
|
|
LocValue = AI;
|
|
|
|
|
|
CGBuilderTy::InsertPointGuard IPG(CGF.Builder);
|
|
|
CGF.Builder.SetInsertPoint(CGF.AllocaInsertPt);
|
|
|
CGF.Builder.CreateMemCpy(LocValue, getOrCreateDefaultLocation(Flags),
|
|
|
- llvm::ConstantExpr::getSizeOf(IdentTy),
|
|
|
- CGM.PointerAlignInBytes);
|
|
|
+ CGM.getSize(getIdentSize(CGF.CGM)));
|
|
|
}
|
|
|
|
|
|
// char **psource = &.kmpc_loc_<flags>.addr.psource;
|
|
|
- auto *PSource = CGF.Builder.CreateConstInBoundsGEP2_32(IdentTy, LocValue, 0,
|
|
|
- IdentField_PSource);
|
|
|
+ Address PSource = createIdentFieldGEP(CGF, LocValue, IdentField_PSource);
|
|
|
|
|
|
auto OMPDebugLoc = OpenMPDebugLocMap.lookup(Loc.getRawEncoding());
|
|
|
if (OMPDebugLoc == nullptr) {
|
|
@@ -389,7 +406,9 @@ llvm::Value *CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction &CGF,
|
|
|
// *psource = ";<File>;<Function>;<Line>;<Column>;;";
|
|
|
CGF.Builder.CreateStore(OMPDebugLoc, PSource);
|
|
|
|
|
|
- return LocValue;
|
|
|
+ // Our callers always pass this to a runtime function, so for
|
|
|
+ // convenience, go ahead and return a naked pointer.
|
|
|
+ return LocValue.getPointer();
|
|
|
}
|
|
|
|
|
|
llvm::Value *CGOpenMPRuntime::getThreadID(CodeGenFunction &CGF,
|
|
@@ -939,25 +958,27 @@ CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl *VD) {
|
|
|
Twine(CGM.getMangledName(VD)) + ".cache.");
|
|
|
}
|
|
|
|
|
|
-llvm::Value *CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
|
|
|
- const VarDecl *VD,
|
|
|
- llvm::Value *VDAddr,
|
|
|
- SourceLocation Loc) {
|
|
|
+Address CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction &CGF,
|
|
|
+ const VarDecl *VD,
|
|
|
+ Address VDAddr,
|
|
|
+ SourceLocation Loc) {
|
|
|
if (CGM.getLangOpts().OpenMPUseTLS &&
|
|
|
CGM.getContext().getTargetInfo().isTLSSupported())
|
|
|
return VDAddr;
|
|
|
|
|
|
- auto VarTy = VDAddr->getType()->getPointerElementType();
|
|
|
+ auto VarTy = VDAddr.getElementType();
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc),
|
|
|
- CGF.Builder.CreatePointerCast(VDAddr, CGM.Int8PtrTy),
|
|
|
+ CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
|
|
|
+ CGM.Int8PtrTy),
|
|
|
CGM.getSize(CGM.GetTargetTypeStoreSize(VarTy)),
|
|
|
getOrCreateThreadPrivateCache(VD)};
|
|
|
- return CGF.EmitRuntimeCall(
|
|
|
- createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args);
|
|
|
+ return Address(CGF.EmitRuntimeCall(
|
|
|
+ createRuntimeFunction(OMPRTL__kmpc_threadprivate_cached), Args),
|
|
|
+ VDAddr.getAlignment());
|
|
|
}
|
|
|
|
|
|
void CGOpenMPRuntime::emitThreadPrivateVarInit(
|
|
|
- CodeGenFunction &CGF, llvm::Value *VDAddr, llvm::Value *Ctor,
|
|
|
+ CodeGenFunction &CGF, Address VDAddr, llvm::Value *Ctor,
|
|
|
llvm::Value *CopyCtor, llvm::Value *Dtor, SourceLocation Loc) {
|
|
|
// Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
|
|
|
// library.
|
|
@@ -967,14 +988,15 @@ void CGOpenMPRuntime::emitThreadPrivateVarInit(
|
|
|
// Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
|
|
|
// to register constructor/destructor for variable.
|
|
|
llvm::Value *Args[] = {OMPLoc,
|
|
|
- CGF.Builder.CreatePointerCast(VDAddr, CGM.VoidPtrTy),
|
|
|
+ CGF.Builder.CreatePointerCast(VDAddr.getPointer(),
|
|
|
+ CGM.VoidPtrTy),
|
|
|
Ctor, CopyCtor, Dtor};
|
|
|
CGF.EmitRuntimeCall(
|
|
|
createRuntimeFunction(OMPRTL__kmpc_threadprivate_register), Args);
|
|
|
}
|
|
|
|
|
|
llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
|
|
|
- const VarDecl *VD, llvm::Value *VDAddr, SourceLocation Loc,
|
|
|
+ const VarDecl *VD, Address VDAddr, SourceLocation Loc,
|
|
|
bool PerformInit, CodeGenFunction *CGF) {
|
|
|
if (CGM.getLangOpts().OpenMPUseTLS &&
|
|
|
CGM.getContext().getTargetInfo().isTLSSupported())
|
|
@@ -1005,17 +1027,15 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
|
|
|
CtorCGF.StartFunction(GlobalDecl(), CGM.getContext().VoidPtrTy, Fn, FI,
|
|
|
Args, SourceLocation());
|
|
|
auto ArgVal = CtorCGF.EmitLoadOfScalar(
|
|
|
- CtorCGF.GetAddrOfLocalVar(&Dst),
|
|
|
- /*Volatile=*/false, CGM.PointerAlignInBytes,
|
|
|
+ CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
|
|
|
CGM.getContext().VoidPtrTy, Dst.getLocation());
|
|
|
- auto Arg = CtorCGF.Builder.CreatePointerCast(
|
|
|
- ArgVal,
|
|
|
- CtorCGF.ConvertTypeForMem(CGM.getContext().getPointerType(ASTTy)));
|
|
|
+ Address Arg = Address(ArgVal, VDAddr.getAlignment());
|
|
|
+ Arg = CtorCGF.Builder.CreateElementBitCast(Arg,
|
|
|
+ CtorCGF.ConvertTypeForMem(ASTTy));
|
|
|
CtorCGF.EmitAnyExprToMem(Init, Arg, Init->getType().getQualifiers(),
|
|
|
/*IsInitializer=*/true);
|
|
|
ArgVal = CtorCGF.EmitLoadOfScalar(
|
|
|
- CtorCGF.GetAddrOfLocalVar(&Dst),
|
|
|
- /*Volatile=*/false, CGM.PointerAlignInBytes,
|
|
|
+ CtorCGF.GetAddrOfLocalVar(&Dst), /*Volatile=*/false,
|
|
|
CGM.getContext().VoidPtrTy, Dst.getLocation());
|
|
|
CtorCGF.Builder.CreateStore(ArgVal, CtorCGF.ReturnValue);
|
|
|
CtorCGF.FinishFunction();
|
|
@@ -1040,9 +1060,8 @@ llvm::Function *CGOpenMPRuntime::emitThreadPrivateVarDefinition(
|
|
|
SourceLocation());
|
|
|
auto ArgVal = DtorCGF.EmitLoadOfScalar(
|
|
|
DtorCGF.GetAddrOfLocalVar(&Dst),
|
|
|
- /*Volatile=*/false, CGM.PointerAlignInBytes,
|
|
|
- CGM.getContext().VoidPtrTy, Dst.getLocation());
|
|
|
- DtorCGF.emitDestroy(ArgVal, ASTTy,
|
|
|
+ /*Volatile=*/false, CGM.getContext().VoidPtrTy, Dst.getLocation());
|
|
|
+ DtorCGF.emitDestroy(Address(ArgVal, VDAddr.getAlignment()), ASTTy,
|
|
|
DtorCGF.getDestroyer(ASTTy.isDestructedType()),
|
|
|
DtorCGF.needsEHCleanup(ASTTy.isDestructedType()));
|
|
|
DtorCGF.FinishFunction();
|
|
@@ -1149,7 +1168,7 @@ static void emitOMPIfClause(CodeGenFunction &CGF, const Expr *Cond,
|
|
|
|
|
|
void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
llvm::Value *OutlinedFn,
|
|
|
- llvm::Value *CapturedStruct,
|
|
|
+ Address CapturedStruct,
|
|
|
const Expr *IfCond) {
|
|
|
auto *RTLoc = emitUpdateLocation(CGF, Loc);
|
|
|
auto &&ThenGen =
|
|
@@ -1162,7 +1181,7 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
1), // Number of arguments after 'microtask' argument
|
|
|
// (there is only one additional argument - 'context')
|
|
|
CGF.Builder.CreateBitCast(OutlinedFn, getKmpc_MicroPointerTy()),
|
|
|
- CGF.EmitCastToVoidPtr(CapturedStruct)};
|
|
|
+ CGF.EmitCastToVoidPtr(CapturedStruct.getPointer())};
|
|
|
auto RTLFn = createRuntimeFunction(OMPRTL__kmpc_fork_call);
|
|
|
CGF.EmitRuntimeCall(RTLFn, Args);
|
|
|
};
|
|
@@ -1177,11 +1196,15 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
|
|
|
// OutlinedFn(>id, &zero, CapturedStruct);
|
|
|
auto ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
|
|
|
- auto Int32Ty = CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32,
|
|
|
- /*Signed*/ true);
|
|
|
- auto ZeroAddr = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".zero.addr");
|
|
|
+ Address ZeroAddr =
|
|
|
+ CGF.CreateTempAlloca(CGF.Int32Ty, CharUnits::fromQuantity(4),
|
|
|
+ /*Name*/ ".zero.addr");
|
|
|
CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
|
|
|
- llvm::Value *OutlinedFnArgs[] = {ThreadIDAddr, ZeroAddr, CapturedStruct};
|
|
|
+ llvm::Value *OutlinedFnArgs[] = {
|
|
|
+ ThreadIDAddr.getPointer(),
|
|
|
+ ZeroAddr.getPointer(),
|
|
|
+ CapturedStruct.getPointer()
|
|
|
+ };
|
|
|
CGF.EmitCallOrInvoke(OutlinedFn, OutlinedFnArgs);
|
|
|
|
|
|
// __kmpc_end_serialized_parallel(&Loc, GTid);
|
|
@@ -1203,8 +1226,8 @@ void CGOpenMPRuntime::emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
// regular serial code region, get thread ID by calling kmp_int32
|
|
|
// kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
|
|
|
// return the address of that temp.
|
|
|
-llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
|
|
|
- SourceLocation Loc) {
|
|
|
+Address CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
|
|
|
+ SourceLocation Loc) {
|
|
|
if (auto OMPRegionInfo =
|
|
|
dyn_cast_or_null<CGOpenMPRegionInfo>(CGF.CapturedStmtInfo))
|
|
|
if (OMPRegionInfo->getThreadIDVariable())
|
|
@@ -1215,7 +1238,7 @@ llvm::Value *CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction &CGF,
|
|
|
CGF.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
|
|
|
auto ThreadIDTemp = CGF.CreateMemTemp(Int32Ty, /*Name*/ ".threadid_temp.");
|
|
|
CGF.EmitStoreOfScalar(ThreadID,
|
|
|
- CGF.MakeNaturalAlignAddrLValue(ThreadIDTemp, Int32Ty));
|
|
|
+ CGF.MakeAddrLValue(ThreadIDTemp, Int32Ty));
|
|
|
|
|
|
return ThreadIDTemp;
|
|
|
}
|
|
@@ -1353,6 +1376,22 @@ void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction &CGF,
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+/// Given an array of pointers to variables, project the address of a
|
|
|
+/// given variable.
|
|
|
+static Address emitAddrOfVarFromArray(CodeGenFunction &CGF,
|
|
|
+ Address Array, unsigned Index,
|
|
|
+ const VarDecl *Var) {
|
|
|
+ // Pull out the pointer to the variable.
|
|
|
+ Address PtrAddr =
|
|
|
+ CGF.Builder.CreateConstArrayGEP(Array, Index, CGF.getPointerSize());
|
|
|
+ llvm::Value *Ptr = CGF.Builder.CreateLoad(PtrAddr);
|
|
|
+
|
|
|
+ Address Addr = Address(Ptr, CGF.getContext().getDeclAlign(Var));
|
|
|
+ Addr = CGF.Builder.CreateElementBitCast(Addr,
|
|
|
+ CGF.ConvertTypeForMem(Var->getType()));
|
|
|
+ return Addr;
|
|
|
+}
|
|
|
+
|
|
|
static llvm::Value *emitCopyprivateCopyFunction(
|
|
|
CodeGenModule &CGM, llvm::Type *ArgsType,
|
|
|
ArrayRef<const Expr *> CopyprivateVars, ArrayRef<const Expr *> DestExprs,
|
|
@@ -1377,35 +1416,26 @@ static llvm::Value *emitCopyprivateCopyFunction(
|
|
|
CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args);
|
|
|
// Dest = (void*[n])(LHSArg);
|
|
|
// Src = (void*[n])(RHSArg);
|
|
|
- auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
- CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
|
|
|
- CGF.PointerAlignInBytes),
|
|
|
- ArgsType);
|
|
|
- auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
- CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
|
|
|
- CGF.PointerAlignInBytes),
|
|
|
- ArgsType);
|
|
|
+ Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
+ CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
|
|
|
+ ArgsType), CGF.getPointerAlign());
|
|
|
+ Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
+ CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
|
|
|
+ ArgsType), CGF.getPointerAlign());
|
|
|
// *(Type0*)Dst[0] = *(Type0*)Src[0];
|
|
|
// *(Type1*)Dst[1] = *(Type1*)Src[1];
|
|
|
// ...
|
|
|
// *(Typen*)Dst[n] = *(Typen*)Src[n];
|
|
|
for (unsigned I = 0, E = AssignmentOps.size(); I < E; ++I) {
|
|
|
- auto *DestAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
- CGF.Builder.CreateAlignedLoad(
|
|
|
- CGF.Builder.CreateStructGEP(nullptr, LHS, I),
|
|
|
- CGM.PointerAlignInBytes),
|
|
|
- CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
|
|
|
- auto *SrcAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
- CGF.Builder.CreateAlignedLoad(
|
|
|
- CGF.Builder.CreateStructGEP(nullptr, RHS, I),
|
|
|
- CGM.PointerAlignInBytes),
|
|
|
- CGF.ConvertTypeForMem(C.getPointerType(SrcExprs[I]->getType())));
|
|
|
+ auto DestVar = cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl());
|
|
|
+ Address DestAddr = emitAddrOfVarFromArray(CGF, LHS, I, DestVar);
|
|
|
+
|
|
|
+ auto SrcVar = cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl());
|
|
|
+ Address SrcAddr = emitAddrOfVarFromArray(CGF, RHS, I, SrcVar);
|
|
|
+
|
|
|
auto *VD = cast<DeclRefExpr>(CopyprivateVars[I])->getDecl();
|
|
|
QualType Type = VD->getType();
|
|
|
- CGF.EmitOMPCopy(CGF, Type, DestAddr, SrcAddr,
|
|
|
- cast<VarDecl>(cast<DeclRefExpr>(DestExprs[I])->getDecl()),
|
|
|
- cast<VarDecl>(cast<DeclRefExpr>(SrcExprs[I])->getDecl()),
|
|
|
- AssignmentOps[I]);
|
|
|
+ CGF.EmitOMPCopy(Type, DestAddr, SrcAddr, DestVar, SrcVar, AssignmentOps[I]);
|
|
|
}
|
|
|
CGF.FinishFunction();
|
|
|
return Fn;
|
|
@@ -1431,13 +1461,12 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
|
|
|
// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
|
|
|
// <copy_func>, did_it);
|
|
|
|
|
|
- llvm::AllocaInst *DidIt = nullptr;
|
|
|
+ Address DidIt = Address::invalid();
|
|
|
if (!CopyprivateVars.empty()) {
|
|
|
// int32 did_it = 0;
|
|
|
auto KmpInt32Ty = C.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
|
|
|
DidIt = CGF.CreateMemTemp(KmpInt32Ty, ".omp.copyprivate.did_it");
|
|
|
- CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(0), DidIt,
|
|
|
- DidIt->getAlignment());
|
|
|
+ CGF.Builder.CreateStore(CGF.Builder.getInt32(0), DidIt);
|
|
|
}
|
|
|
// Prepare arguments and build a call to __kmpc_single
|
|
|
llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc), getThreadID(CGF, Loc)};
|
|
@@ -1452,29 +1481,28 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
|
|
|
NormalAndEHCleanup, createRuntimeFunction(OMPRTL__kmpc_end_single),
|
|
|
llvm::makeArrayRef(Args));
|
|
|
SingleOpGen(CGF);
|
|
|
- if (DidIt) {
|
|
|
+ if (DidIt.isValid()) {
|
|
|
// did_it = 1;
|
|
|
- CGF.Builder.CreateAlignedStore(CGF.Builder.getInt32(1), DidIt,
|
|
|
- DidIt->getAlignment());
|
|
|
+ CGF.Builder.CreateStore(CGF.Builder.getInt32(1), DidIt);
|
|
|
}
|
|
|
});
|
|
|
// call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
|
|
|
// <copy_func>, did_it);
|
|
|
- if (DidIt) {
|
|
|
+ if (DidIt.isValid()) {
|
|
|
llvm::APInt ArraySize(/*unsigned int numBits=*/32, CopyprivateVars.size());
|
|
|
auto CopyprivateArrayTy =
|
|
|
C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
|
|
|
/*IndexTypeQuals=*/0);
|
|
|
// Create a list of all private variables for copyprivate.
|
|
|
- auto *CopyprivateList =
|
|
|
+ Address CopyprivateList =
|
|
|
CGF.CreateMemTemp(CopyprivateArrayTy, ".omp.copyprivate.cpr_list");
|
|
|
for (unsigned I = 0, E = CopyprivateVars.size(); I < E; ++I) {
|
|
|
- auto *Elem = CGF.Builder.CreateStructGEP(
|
|
|
- CopyprivateList->getAllocatedType(), CopyprivateList, I);
|
|
|
- CGF.Builder.CreateAlignedStore(
|
|
|
+ Address Elem = CGF.Builder.CreateConstArrayGEP(
|
|
|
+ CopyprivateList, I, CGF.getPointerSize());
|
|
|
+ CGF.Builder.CreateStore(
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
- CGF.EmitLValue(CopyprivateVars[I]).getAddress(), CGF.VoidPtrTy),
|
|
|
- Elem, CGM.PointerAlignInBytes);
|
|
|
+ CGF.EmitLValue(CopyprivateVars[I]).getPointer(), CGF.VoidPtrTy),
|
|
|
+ Elem);
|
|
|
}
|
|
|
// Build function that copies private values from single region to all other
|
|
|
// threads in the corresponding parallel region.
|
|
@@ -1483,15 +1511,15 @@ void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction &CGF,
|
|
|
CopyprivateVars, SrcExprs, DstExprs, AssignmentOps);
|
|
|
auto *BufSize = llvm::ConstantInt::get(
|
|
|
CGM.SizeTy, C.getTypeSizeInChars(CopyprivateArrayTy).getQuantity());
|
|
|
- auto *CL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
|
|
|
- CGF.VoidPtrTy);
|
|
|
- auto *DidItVal =
|
|
|
- CGF.Builder.CreateAlignedLoad(DidIt, CGF.PointerAlignInBytes);
|
|
|
+ Address CL =
|
|
|
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(CopyprivateList,
|
|
|
+ CGF.VoidPtrTy);
|
|
|
+ auto *DidItVal = CGF.Builder.CreateLoad(DidIt);
|
|
|
llvm::Value *Args[] = {
|
|
|
emitUpdateLocation(CGF, Loc), // ident_t *<loc>
|
|
|
getThreadID(CGF, Loc), // i32 <gtid>
|
|
|
BufSize, // size_t <buf_size>
|
|
|
- CL, // void *<copyprivate list>
|
|
|
+ CL.getPointer(), // void *<copyprivate list>
|
|
|
CpyFn, // void (*) (void *, void *) <copy_func>
|
|
|
DidItVal // i32 did_it
|
|
|
};
|
|
@@ -1625,61 +1653,77 @@ bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind) const {
|
|
|
return Schedule != OMP_sch_static;
|
|
|
}
|
|
|
|
|
|
-void CGOpenMPRuntime::emitForInit(CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
- OpenMPScheduleClauseKind ScheduleKind,
|
|
|
- unsigned IVSize, bool IVSigned, bool Ordered,
|
|
|
- llvm::Value *IL, llvm::Value *LB,
|
|
|
- llvm::Value *UB, llvm::Value *ST,
|
|
|
- llvm::Value *Chunk) {
|
|
|
+void CGOpenMPRuntime::emitForDispatchInit(CodeGenFunction &CGF,
|
|
|
+ SourceLocation Loc,
|
|
|
+ OpenMPScheduleClauseKind ScheduleKind,
|
|
|
+ unsigned IVSize, bool IVSigned,
|
|
|
+ bool Ordered, llvm::Value *UB,
|
|
|
+ llvm::Value *Chunk) {
|
|
|
OpenMPSchedType Schedule =
|
|
|
getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
|
|
|
- if (Ordered ||
|
|
|
- (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
|
|
|
- Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked)) {
|
|
|
- // Call __kmpc_dispatch_init(
|
|
|
- // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
|
|
|
- // kmp_int[32|64] lower, kmp_int[32|64] upper,
|
|
|
- // kmp_int[32|64] stride, kmp_int[32|64] chunk);
|
|
|
+ assert(Ordered ||
|
|
|
+ (Schedule != OMP_sch_static && Schedule != OMP_sch_static_chunked &&
|
|
|
+ Schedule != OMP_ord_static && Schedule != OMP_ord_static_chunked));
|
|
|
+ // Call __kmpc_dispatch_init(
|
|
|
+ // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
|
|
|
+ // kmp_int[32|64] lower, kmp_int[32|64] upper,
|
|
|
+ // kmp_int[32|64] stride, kmp_int[32|64] chunk);
|
|
|
+
|
|
|
+ // If the Chunk was not specified in the clause - use default value 1.
|
|
|
+ if (Chunk == nullptr)
|
|
|
+ Chunk = CGF.Builder.getIntN(IVSize, 1);
|
|
|
+ llvm::Value *Args[] = {
|
|
|
+ emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
|
|
|
+ getThreadID(CGF, Loc),
|
|
|
+ CGF.Builder.getInt32(Schedule), // Schedule type
|
|
|
+ CGF.Builder.getIntN(IVSize, 0), // Lower
|
|
|
+ UB, // Upper
|
|
|
+ CGF.Builder.getIntN(IVSize, 1), // Stride
|
|
|
+ Chunk // Chunk
|
|
|
+ };
|
|
|
+ CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
|
|
|
+}
|
|
|
|
|
|
+void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction &CGF,
|
|
|
+ SourceLocation Loc,
|
|
|
+ OpenMPScheduleClauseKind ScheduleKind,
|
|
|
+ unsigned IVSize, bool IVSigned,
|
|
|
+ bool Ordered, Address IL, Address LB,
|
|
|
+ Address UB, Address ST,
|
|
|
+ llvm::Value *Chunk) {
|
|
|
+ OpenMPSchedType Schedule =
|
|
|
+ getRuntimeSchedule(ScheduleKind, Chunk != nullptr, Ordered);
|
|
|
+ assert(!Ordered);
|
|
|
+ assert(Schedule == OMP_sch_static || Schedule == OMP_sch_static_chunked ||
|
|
|
+ Schedule == OMP_ord_static || Schedule == OMP_ord_static_chunked);
|
|
|
+
|
|
|
+ // Call __kmpc_for_static_init(
|
|
|
+ // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
|
|
|
+ // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
|
|
|
+ // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
|
|
|
+ // kmp_int[32|64] incr, kmp_int[32|64] chunk);
|
|
|
+ if (Chunk == nullptr) {
|
|
|
+ assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
|
|
|
+ "expected static non-chunked schedule");
|
|
|
// If the Chunk was not specified in the clause - use default value 1.
|
|
|
- if (Chunk == nullptr)
|
|
|
Chunk = CGF.Builder.getIntN(IVSize, 1);
|
|
|
- llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
|
|
|
- getThreadID(CGF, Loc),
|
|
|
- CGF.Builder.getInt32(Schedule), // Schedule type
|
|
|
- CGF.Builder.getIntN(IVSize, 0), // Lower
|
|
|
- UB, // Upper
|
|
|
- CGF.Builder.getIntN(IVSize, 1), // Stride
|
|
|
- Chunk // Chunk
|
|
|
- };
|
|
|
- CGF.EmitRuntimeCall(createDispatchInitFunction(IVSize, IVSigned), Args);
|
|
|
} else {
|
|
|
- // Call __kmpc_for_static_init(
|
|
|
- // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
|
|
|
- // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
|
|
|
- // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
|
|
|
- // kmp_int[32|64] incr, kmp_int[32|64] chunk);
|
|
|
- if (Chunk == nullptr) {
|
|
|
- assert((Schedule == OMP_sch_static || Schedule == OMP_ord_static) &&
|
|
|
- "expected static non-chunked schedule");
|
|
|
- // If the Chunk was not specified in the clause - use default value 1.
|
|
|
- Chunk = CGF.Builder.getIntN(IVSize, 1);
|
|
|
- } else
|
|
|
- assert((Schedule == OMP_sch_static_chunked ||
|
|
|
- Schedule == OMP_ord_static_chunked) &&
|
|
|
- "expected static chunked schedule");
|
|
|
- llvm::Value *Args[] = { emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
|
|
|
- getThreadID(CGF, Loc),
|
|
|
- CGF.Builder.getInt32(Schedule), // Schedule type
|
|
|
- IL, // &isLastIter
|
|
|
- LB, // &LB
|
|
|
- UB, // &UB
|
|
|
- ST, // &Stride
|
|
|
- CGF.Builder.getIntN(IVSize, 1), // Incr
|
|
|
- Chunk // Chunk
|
|
|
- };
|
|
|
- CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
|
|
|
+ assert((Schedule == OMP_sch_static_chunked ||
|
|
|
+ Schedule == OMP_ord_static_chunked) &&
|
|
|
+ "expected static chunked schedule");
|
|
|
}
|
|
|
+ llvm::Value *Args[] = {
|
|
|
+ emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC),
|
|
|
+ getThreadID(CGF, Loc),
|
|
|
+ CGF.Builder.getInt32(Schedule), // Schedule type
|
|
|
+ IL.getPointer(), // &isLastIter
|
|
|
+ LB.getPointer(), // &LB
|
|
|
+ UB.getPointer(), // &UB
|
|
|
+ ST.getPointer(), // &Stride
|
|
|
+ CGF.Builder.getIntN(IVSize, 1), // Incr
|
|
|
+ Chunk // Chunk
|
|
|
+ };
|
|
|
+ CGF.EmitRuntimeCall(createForStaticInitFunction(IVSize, IVSigned), Args);
|
|
|
}
|
|
|
|
|
|
void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF,
|
|
@@ -1703,19 +1747,19 @@ void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction &CGF,
|
|
|
|
|
|
llvm::Value *CGOpenMPRuntime::emitForNext(CodeGenFunction &CGF,
|
|
|
SourceLocation Loc, unsigned IVSize,
|
|
|
- bool IVSigned, llvm::Value *IL,
|
|
|
- llvm::Value *LB, llvm::Value *UB,
|
|
|
- llvm::Value *ST) {
|
|
|
+ bool IVSigned, Address IL,
|
|
|
+ Address LB, Address UB,
|
|
|
+ Address ST) {
|
|
|
// Call __kmpc_dispatch_next(
|
|
|
// ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
|
|
|
// kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
|
|
|
// kmp_int[32|64] *p_stride);
|
|
|
llvm::Value *Args[] = {
|
|
|
emitUpdateLocation(CGF, Loc, OMP_IDENT_KMPC), getThreadID(CGF, Loc),
|
|
|
- IL, // &isLastIter
|
|
|
- LB, // &Lower
|
|
|
- UB, // &Upper
|
|
|
- ST // &Stride
|
|
|
+ IL.getPointer(), // &isLastIter
|
|
|
+ LB.getPointer(), // &Lower
|
|
|
+ UB.getPointer(), // &Upper
|
|
|
+ ST.getPointer() // &Stride
|
|
|
};
|
|
|
llvm::Value *Call =
|
|
|
CGF.EmitRuntimeCall(createDispatchNextFunction(IVSize, IVSigned), Args);
|
|
@@ -1921,10 +1965,9 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
|
|
|
// TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
|
|
|
// tt->task_data.shareds);
|
|
|
auto *GtidParam = CGF.EmitLoadOfScalar(
|
|
|
- CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false,
|
|
|
- C.getTypeAlignInChars(KmpInt32Ty).getQuantity(), KmpInt32Ty, Loc);
|
|
|
- auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
|
|
|
- CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
|
|
|
+ CGF.GetAddrOfLocalVar(&GtidArg), /*Volatile=*/false, KmpInt32Ty, Loc);
|
|
|
+ auto *TaskTypeArgAddr = CGF.Builder.CreateLoad(
|
|
|
+ CGF.GetAddrOfLocalVar(&TaskTypeArg));
|
|
|
LValue TDBase =
|
|
|
CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
|
|
|
auto *KmpTaskTWithPrivatesQTyRD =
|
|
@@ -1947,7 +1990,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
|
|
|
if (PrivatesFI != KmpTaskTWithPrivatesQTyRD->field_end()) {
|
|
|
auto PrivatesLVal = CGF.EmitLValueForField(TDBase, *PrivatesFI);
|
|
|
PrivatesParam = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
- PrivatesLVal.getAddress(), CGF.VoidPtrTy);
|
|
|
+ PrivatesLVal.getPointer(), CGF.VoidPtrTy);
|
|
|
} else {
|
|
|
PrivatesParam = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
|
|
|
}
|
|
@@ -1957,7 +2000,7 @@ emitProxyTaskFunction(CodeGenModule &CGM, SourceLocation Loc,
|
|
|
CGF.EmitCallOrInvoke(TaskFunction, CallArgs);
|
|
|
CGF.EmitStoreThroughLValue(
|
|
|
RValue::get(CGF.Builder.getInt32(/*C=*/0)),
|
|
|
- CGF.MakeNaturalAlignAddrLValue(CGF.ReturnValue, KmpInt32Ty));
|
|
|
+ CGF.MakeAddrLValue(CGF.ReturnValue, KmpInt32Ty));
|
|
|
CGF.FinishFunction();
|
|
|
return TaskEntry;
|
|
|
}
|
|
@@ -1988,8 +2031,8 @@ static llvm::Value *emitDestructorsFunction(CodeGenModule &CGM,
|
|
|
CGF.StartFunction(GlobalDecl(), KmpInt32Ty, DestructorFn, DestructorFnInfo,
|
|
|
Args);
|
|
|
|
|
|
- auto *TaskTypeArgAddr = CGF.Builder.CreateAlignedLoad(
|
|
|
- CGF.GetAddrOfLocalVar(&TaskTypeArg), CGM.PointerAlignInBytes);
|
|
|
+ auto *TaskTypeArgAddr = CGF.Builder.CreateLoad(
|
|
|
+ CGF.GetAddrOfLocalVar(&TaskTypeArg));
|
|
|
LValue Base =
|
|
|
CGF.MakeNaturalAlignAddrLValue(TaskTypeArgAddr, KmpTaskTWithPrivatesQTy);
|
|
|
auto *KmpTaskTWithPrivatesQTyRD =
|
|
@@ -2069,8 +2112,8 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
|
|
|
TaskPrivatesMapFnInfo, Args);
|
|
|
|
|
|
// *privi = &.privates.privi;
|
|
|
- auto *TaskPrivatesArgAddr = CGF.Builder.CreateAlignedLoad(
|
|
|
- CGF.GetAddrOfLocalVar(&TaskPrivatesArg), CGM.PointerAlignInBytes);
|
|
|
+ auto *TaskPrivatesArgAddr = CGF.Builder.CreateLoad(
|
|
|
+ CGF.GetAddrOfLocalVar(&TaskPrivatesArg));
|
|
|
LValue Base =
|
|
|
CGF.MakeNaturalAlignAddrLValue(TaskPrivatesArgAddr, PrivatesQTy);
|
|
|
auto *PrivatesQTyRD = cast<RecordDecl>(PrivatesQTy->getAsTagDecl());
|
|
@@ -2078,11 +2121,10 @@ emitTaskPrivateMappingFunction(CodeGenModule &CGM, SourceLocation Loc,
|
|
|
for (auto *Field : PrivatesQTyRD->fields()) {
|
|
|
auto FieldLVal = CGF.EmitLValueForField(Base, Field);
|
|
|
auto *VD = Args[PrivateVarsPos[Privates[Counter].second.Original]];
|
|
|
- auto RefLVal = CGF.MakeNaturalAlignAddrLValue(CGF.GetAddrOfLocalVar(VD),
|
|
|
- VD->getType());
|
|
|
+ auto RefLVal = CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
|
|
|
auto RefLoadRVal = CGF.EmitLoadOfLValue(RefLVal, Loc);
|
|
|
CGF.EmitStoreOfScalar(
|
|
|
- FieldLVal.getAddress(),
|
|
|
+ FieldLVal.getPointer(),
|
|
|
CGF.MakeNaturalAlignAddrLValue(RefLoadRVal.getScalarVal(),
|
|
|
RefLVal.getType()->getPointeeType()));
|
|
|
++Counter;
|
|
@@ -2120,7 +2162,7 @@ static int array_pod_sort_comparator(const PrivateDataTy *P1,
|
|
|
void CGOpenMPRuntime::emitTaskCall(
|
|
|
CodeGenFunction &CGF, SourceLocation Loc, const OMPExecutableDirective &D,
|
|
|
bool Tied, llvm::PointerIntPair<llvm::Value *, 1, bool> Final,
|
|
|
- llvm::Value *TaskFunction, QualType SharedsTy, llvm::Value *Shareds,
|
|
|
+ llvm::Value *TaskFunction, QualType SharedsTy, Address Shareds,
|
|
|
const Expr *IfCond, ArrayRef<const Expr *> PrivateVars,
|
|
|
ArrayRef<const Expr *> PrivateCopies,
|
|
|
ArrayRef<const Expr *> FirstprivateVars,
|
|
@@ -2227,12 +2269,12 @@ void CGOpenMPRuntime::emitTaskCall(
|
|
|
CGF.EmitLValueForField(Base, *KmpTaskTWithPrivatesQTyRD->field_begin());
|
|
|
// Fill the data in the resulting kmp_task_t record.
|
|
|
// Copy shareds if there are any.
|
|
|
- llvm::Value *KmpTaskSharedsPtr = nullptr;
|
|
|
+ Address KmpTaskSharedsPtr = Address::invalid();
|
|
|
if (!SharedsTy->getAsStructureType()->getDecl()->field_empty()) {
|
|
|
- KmpTaskSharedsPtr = CGF.EmitLoadOfScalar(
|
|
|
+ KmpTaskSharedsPtr = Address(CGF.EmitLoadOfScalar(
|
|
|
CGF.EmitLValueForField(
|
|
|
TDBase, *std::next(KmpTaskTQTyRD->field_begin(), KmpTaskTShareds)),
|
|
|
- Loc);
|
|
|
+ Loc), CGF.getNaturalTypeAlignment(SharedsTy));
|
|
|
CGF.EmitAggregateCopy(KmpTaskSharedsPtr, Shareds, SharedsTy);
|
|
|
}
|
|
|
// Emit initial values for private copies (if any).
|
|
@@ -2243,7 +2285,7 @@ void CGOpenMPRuntime::emitTaskCall(
|
|
|
FI = cast<RecordDecl>(FI->getType()->getAsTagDecl())->field_begin();
|
|
|
LValue SharedsBase;
|
|
|
if (!FirstprivateVars.empty()) {
|
|
|
- SharedsBase = CGF.MakeNaturalAlignAddrLValue(
|
|
|
+ SharedsBase = CGF.MakeAddrLValue(
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
KmpTaskSharedsPtr, CGF.ConvertTypeForMem(SharedsPtrTy)),
|
|
|
SharedsTy);
|
|
@@ -2274,10 +2316,10 @@ void CGOpenMPRuntime::emitTaskCall(
|
|
|
CGF.EmitOMPAggregateAssign(
|
|
|
PrivateLValue.getAddress(), SharedRefLValue.getAddress(),
|
|
|
Type, [&CGF, Elem, Init, &CapturesInfo](
|
|
|
- llvm::Value *DestElement, llvm::Value *SrcElement) {
|
|
|
+ Address DestElement, Address SrcElement) {
|
|
|
// Clean up any temporaries needed by the initialization.
|
|
|
CodeGenFunction::OMPPrivateScope InitScope(CGF);
|
|
|
- InitScope.addPrivate(Elem, [SrcElement]() -> llvm::Value *{
|
|
|
+ InitScope.addPrivate(Elem, [SrcElement]() -> Address {
|
|
|
return SrcElement;
|
|
|
});
|
|
|
(void)InitScope.Privatize();
|
|
@@ -2291,7 +2333,7 @@ void CGOpenMPRuntime::emitTaskCall(
|
|
|
}
|
|
|
} else {
|
|
|
CodeGenFunction::OMPPrivateScope InitScope(CGF);
|
|
|
- InitScope.addPrivate(Elem, [SharedRefLValue]() -> llvm::Value *{
|
|
|
+ InitScope.addPrivate(Elem, [SharedRefLValue]() -> Address {
|
|
|
return SharedRefLValue.getAddress();
|
|
|
});
|
|
|
(void)InitScope.Privatize();
|
|
@@ -2321,9 +2363,9 @@ void CGOpenMPRuntime::emitTaskCall(
|
|
|
Destructor);
|
|
|
|
|
|
// Process list of dependences.
|
|
|
- llvm::Value *DependInfo = nullptr;
|
|
|
- unsigned DependencesNumber = Dependences.size();
|
|
|
- if (!Dependences.empty()) {
|
|
|
+ Address DependenciesArray = Address::invalid();
|
|
|
+ unsigned NumDependencies = Dependences.size();
|
|
|
+ if (NumDependencies) {
|
|
|
// Dependence kind for RTL.
|
|
|
enum RTLDependenceKindTy { DepIn = 1, DepOut = 2, DepInOut = 3 };
|
|
|
enum RTLDependInfoFieldsTy { BaseAddr, Len, Flags };
|
|
@@ -2342,37 +2384,39 @@ void CGOpenMPRuntime::emitTaskCall(
|
|
|
} else {
|
|
|
KmpDependInfoRD = cast<RecordDecl>(KmpDependInfoTy->getAsTagDecl());
|
|
|
}
|
|
|
+ CharUnits DependencySize = C.getTypeSizeInChars(KmpDependInfoTy);
|
|
|
// Define type kmp_depend_info[<Dependences.size()>];
|
|
|
QualType KmpDependInfoArrayTy = C.getConstantArrayType(
|
|
|
- KmpDependInfoTy, llvm::APInt(/*numBits=*/64, Dependences.size()),
|
|
|
+ KmpDependInfoTy, llvm::APInt(/*numBits=*/64, NumDependencies),
|
|
|
ArrayType::Normal, /*IndexTypeQuals=*/0);
|
|
|
// kmp_depend_info[<Dependences.size()>] deps;
|
|
|
- DependInfo = CGF.CreateMemTemp(KmpDependInfoArrayTy);
|
|
|
- for (unsigned i = 0; i < DependencesNumber; ++i) {
|
|
|
- auto *E = Dependences[i].second;
|
|
|
- LValue Addr = CGF.EmitLValue(E);
|
|
|
+ DependenciesArray = CGF.CreateMemTemp(KmpDependInfoArrayTy);
|
|
|
+ for (unsigned i = 0; i < NumDependencies; ++i) {
|
|
|
+ const Expr *E = Dependences[i].second;
|
|
|
+ auto Addr = CGF.EmitLValue(E);
|
|
|
llvm::Value *Size;
|
|
|
QualType Ty = E->getType();
|
|
|
- auto *DestAddr = Addr.getAddress();
|
|
|
if (auto *ASE = dyn_cast<OMPArraySectionExpr>(E->IgnoreParenImpCasts())) {
|
|
|
LValue UpAddrLVal =
|
|
|
CGF.EmitOMPArraySectionExpr(ASE, /*LowerBound=*/false);
|
|
|
llvm::Value *UpAddr =
|
|
|
- CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getAddress(), /*Idx0=*/1);
|
|
|
+ CGF.Builder.CreateConstGEP1_32(UpAddrLVal.getPointer(), /*Idx0=*/1);
|
|
|
llvm::Value *LowIntPtr =
|
|
|
- CGF.Builder.CreatePtrToInt(DestAddr, CGM.SizeTy);
|
|
|
+ CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGM.SizeTy);
|
|
|
llvm::Value *UpIntPtr = CGF.Builder.CreatePtrToInt(UpAddr, CGM.SizeTy);
|
|
|
Size = CGF.Builder.CreateNUWSub(UpIntPtr, LowIntPtr);
|
|
|
- } else
|
|
|
+ } else {
|
|
|
Size = getTypeSize(CGF, Ty);
|
|
|
- auto Base = CGF.MakeNaturalAlignAddrLValue(
|
|
|
- CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, i),
|
|
|
+ }
|
|
|
+ auto Base = CGF.MakeAddrLValue(
|
|
|
+ CGF.Builder.CreateConstArrayGEP(DependenciesArray, i, DependencySize),
|
|
|
KmpDependInfoTy);
|
|
|
// deps[i].base_addr = &<Dependences[i].second>;
|
|
|
auto BaseAddrLVal = CGF.EmitLValueForField(
|
|
|
Base, *std::next(KmpDependInfoRD->field_begin(), BaseAddr));
|
|
|
- CGF.EmitStoreOfScalar(CGF.Builder.CreatePtrToInt(DestAddr, CGF.IntPtrTy),
|
|
|
- BaseAddrLVal);
|
|
|
+ CGF.EmitStoreOfScalar(
|
|
|
+ CGF.Builder.CreatePtrToInt(Addr.getPointer(), CGF.IntPtrTy),
|
|
|
+ BaseAddrLVal);
|
|
|
// deps[i].len = sizeof(<Dependences[i].second>);
|
|
|
auto LenLVal = CGF.EmitLValueForField(
|
|
|
Base, *std::next(KmpDependInfoRD->field_begin(), Len));
|
|
@@ -2397,8 +2441,8 @@ void CGOpenMPRuntime::emitTaskCall(
|
|
|
CGF.EmitStoreOfScalar(llvm::ConstantInt::get(LLVMFlagsTy, DepKind),
|
|
|
FlagsLVal);
|
|
|
}
|
|
|
- DependInfo = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
- CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, DependInfo, 0),
|
|
|
+ DependenciesArray = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
+ CGF.Builder.CreateStructGEP(DependenciesArray, 0, CharUnits::Zero()),
|
|
|
CGF.VoidPtrTy);
|
|
|
}
|
|
|
|
|
@@ -2412,40 +2456,48 @@ void CGOpenMPRuntime::emitTaskCall(
|
|
|
// list is not empty
|
|
|
auto *ThreadID = getThreadID(CGF, Loc);
|
|
|
auto *UpLoc = emitUpdateLocation(CGF, Loc);
|
|
|
- llvm::Value *TaskArgs[] = {UpLoc, ThreadID, NewTask};
|
|
|
- llvm::Value *DepTaskArgs[] = {
|
|
|
- UpLoc,
|
|
|
- ThreadID,
|
|
|
- NewTask,
|
|
|
- DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr,
|
|
|
- DependInfo,
|
|
|
- DependInfo ? CGF.Builder.getInt32(0) : nullptr,
|
|
|
- DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr};
|
|
|
- auto &&ThenCodeGen = [this, DependInfo, &TaskArgs,
|
|
|
- &DepTaskArgs](CodeGenFunction &CGF) {
|
|
|
- // TODO: add check for untied tasks.
|
|
|
- CGF.EmitRuntimeCall(
|
|
|
- createRuntimeFunction(DependInfo ? OMPRTL__kmpc_omp_task_with_deps
|
|
|
- : OMPRTL__kmpc_omp_task),
|
|
|
- DependInfo ? makeArrayRef(DepTaskArgs) : makeArrayRef(TaskArgs));
|
|
|
+ llvm::Value *TaskArgs[] = { UpLoc, ThreadID, NewTask };
|
|
|
+ llvm::Value *DepTaskArgs[7];
|
|
|
+ if (NumDependencies) {
|
|
|
+ DepTaskArgs[0] = UpLoc;
|
|
|
+ DepTaskArgs[1] = ThreadID;
|
|
|
+ DepTaskArgs[2] = NewTask;
|
|
|
+ DepTaskArgs[3] = CGF.Builder.getInt32(NumDependencies);
|
|
|
+ DepTaskArgs[4] = DependenciesArray.getPointer();
|
|
|
+ DepTaskArgs[5] = CGF.Builder.getInt32(0);
|
|
|
+ DepTaskArgs[6] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
|
|
|
+ }
|
|
|
+ auto &&ThenCodeGen = [this, NumDependencies,
|
|
|
+ &TaskArgs, &DepTaskArgs](CodeGenFunction &CGF) {
|
|
|
+ // TODO: add check for untied tasks.
|
|
|
+ if (NumDependencies) {
|
|
|
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task_with_deps),
|
|
|
+ DepTaskArgs);
|
|
|
+ } else {
|
|
|
+ CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_task),
|
|
|
+ TaskArgs);
|
|
|
+ }
|
|
|
};
|
|
|
typedef CallEndCleanup<std::extent<decltype(TaskArgs)>::value>
|
|
|
IfCallEndCleanup;
|
|
|
- llvm::Value *DepWaitTaskArgs[] = {
|
|
|
- UpLoc,
|
|
|
- ThreadID,
|
|
|
- DependInfo ? CGF.Builder.getInt32(DependencesNumber) : nullptr,
|
|
|
- DependInfo,
|
|
|
- DependInfo ? CGF.Builder.getInt32(0) : nullptr,
|
|
|
- DependInfo ? llvm::ConstantPointerNull::get(CGF.VoidPtrTy) : nullptr};
|
|
|
+
|
|
|
+ llvm::Value *DepWaitTaskArgs[6];
|
|
|
+ if (NumDependencies) {
|
|
|
+ DepWaitTaskArgs[0] = UpLoc;
|
|
|
+ DepWaitTaskArgs[1] = ThreadID;
|
|
|
+ DepWaitTaskArgs[2] = CGF.Builder.getInt32(NumDependencies);
|
|
|
+ DepWaitTaskArgs[3] = DependenciesArray.getPointer();
|
|
|
+ DepWaitTaskArgs[4] = CGF.Builder.getInt32(0);
|
|
|
+ DepWaitTaskArgs[5] = llvm::ConstantPointerNull::get(CGF.VoidPtrTy);
|
|
|
+ }
|
|
|
auto &&ElseCodeGen = [this, &TaskArgs, ThreadID, NewTaskNewTaskTTy, TaskEntry,
|
|
|
- DependInfo, &DepWaitTaskArgs](CodeGenFunction &CGF) {
|
|
|
+ NumDependencies, &DepWaitTaskArgs](CodeGenFunction &CGF) {
|
|
|
CodeGenFunction::RunCleanupsScope LocalScope(CGF);
|
|
|
// Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
|
|
|
// kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
|
|
|
// ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
|
|
|
// is specified.
|
|
|
- if (DependInfo)
|
|
|
+ if (NumDependencies)
|
|
|
CGF.EmitRuntimeCall(createRuntimeFunction(OMPRTL__kmpc_omp_wait_deps),
|
|
|
DepWaitTaskArgs);
|
|
|
// Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
|
|
@@ -2463,6 +2515,7 @@ void CGOpenMPRuntime::emitTaskCall(
|
|
|
llvm::Value *OutlinedFnArgs[] = {ThreadID, NewTaskNewTaskTTy};
|
|
|
CGF.EmitCallOrInvoke(TaskEntry, OutlinedFnArgs);
|
|
|
};
|
|
|
+
|
|
|
if (IfCond) {
|
|
|
emitOMPIfClause(CGF, IfCond, ThenCodeGen, ElseCodeGen);
|
|
|
} else {
|
|
@@ -2498,38 +2551,26 @@ static llvm::Value *emitReductionFunction(CodeGenModule &CGM,
|
|
|
|
|
|
// Dst = (void*[n])(LHSArg);
|
|
|
// Src = (void*[n])(RHSArg);
|
|
|
- auto *LHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
- CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&LHSArg),
|
|
|
- CGF.PointerAlignInBytes),
|
|
|
- ArgsType);
|
|
|
- auto *RHS = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
- CGF.Builder.CreateAlignedLoad(CGF.GetAddrOfLocalVar(&RHSArg),
|
|
|
- CGF.PointerAlignInBytes),
|
|
|
- ArgsType);
|
|
|
+ Address LHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
+ CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&LHSArg)),
|
|
|
+ ArgsType), CGF.getPointerAlign());
|
|
|
+ Address RHS(CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
+ CGF.Builder.CreateLoad(CGF.GetAddrOfLocalVar(&RHSArg)),
|
|
|
+ ArgsType), CGF.getPointerAlign());
|
|
|
|
|
|
// ...
|
|
|
// *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
|
|
|
// ...
|
|
|
CodeGenFunction::OMPPrivateScope Scope(CGF);
|
|
|
for (unsigned I = 0, E = ReductionOps.size(); I < E; ++I) {
|
|
|
- Scope.addPrivate(
|
|
|
- cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl()),
|
|
|
- [&]() -> llvm::Value *{
|
|
|
- return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
- CGF.Builder.CreateAlignedLoad(
|
|
|
- CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, RHS, I),
|
|
|
- CGM.PointerAlignInBytes),
|
|
|
- CGF.ConvertTypeForMem(C.getPointerType(RHSExprs[I]->getType())));
|
|
|
- });
|
|
|
- Scope.addPrivate(
|
|
|
- cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl()),
|
|
|
- [&]() -> llvm::Value *{
|
|
|
- return CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
- CGF.Builder.CreateAlignedLoad(
|
|
|
- CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, LHS, I),
|
|
|
- CGM.PointerAlignInBytes),
|
|
|
- CGF.ConvertTypeForMem(C.getPointerType(LHSExprs[I]->getType())));
|
|
|
- });
|
|
|
+ auto RHSVar = cast<VarDecl>(cast<DeclRefExpr>(RHSExprs[I])->getDecl());
|
|
|
+ Scope.addPrivate(RHSVar, [&]() -> Address {
|
|
|
+ return emitAddrOfVarFromArray(CGF, RHS, I, RHSVar);
|
|
|
+ });
|
|
|
+ auto LHSVar = cast<VarDecl>(cast<DeclRefExpr>(LHSExprs[I])->getDecl());
|
|
|
+ Scope.addPrivate(LHSVar, [&]() -> Address {
|
|
|
+ return emitAddrOfVarFromArray(CGF, LHS, I, LHSVar);
|
|
|
+ });
|
|
|
}
|
|
|
Scope.Privatize();
|
|
|
for (auto *E : ReductionOps) {
|
|
@@ -2596,14 +2637,15 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
QualType ReductionArrayTy =
|
|
|
C.getConstantArrayType(C.VoidPtrTy, ArraySize, ArrayType::Normal,
|
|
|
/*IndexTypeQuals=*/0);
|
|
|
- auto *ReductionList =
|
|
|
+ Address ReductionList =
|
|
|
CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
|
|
|
for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I) {
|
|
|
- auto *Elem = CGF.Builder.CreateStructGEP(/*Ty=*/nullptr, ReductionList, I);
|
|
|
- CGF.Builder.CreateAlignedStore(
|
|
|
+ Address Elem =
|
|
|
+ CGF.Builder.CreateConstArrayGEP(ReductionList, I, CGF.getPointerSize());
|
|
|
+ CGF.Builder.CreateStore(
|
|
|
CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
|
|
|
- CGF.EmitLValue(RHSExprs[I]).getAddress(), CGF.VoidPtrTy),
|
|
|
- Elem, CGM.PointerAlignInBytes);
|
|
|
+ CGF.EmitLValue(RHSExprs[I]).getPointer(), CGF.VoidPtrTy),
|
|
|
+ Elem);
|
|
|
}
|
|
|
|
|
|
// 2. Emit reduce_func().
|
|
@@ -2622,8 +2664,9 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
auto *ThreadId = getThreadID(CGF, Loc);
|
|
|
auto *ReductionArrayTySize = llvm::ConstantInt::get(
|
|
|
CGM.SizeTy, C.getTypeSizeInChars(ReductionArrayTy).getQuantity());
|
|
|
- auto *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList,
|
|
|
- CGF.VoidPtrTy);
|
|
|
+ auto *RL =
|
|
|
+ CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(ReductionList.getPointer(),
|
|
|
+ CGF.VoidPtrTy);
|
|
|
llvm::Value *Args[] = {
|
|
|
IdentTLoc, // ident_t *<loc>
|
|
|
ThreadId, // i32 <gtid>
|
|
@@ -2736,11 +2779,11 @@ void CGOpenMPRuntime::emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
|
|
|
[&CGF, UpExpr, VD](RValue XRValue) {
|
|
|
CodeGenFunction::OMPPrivateScope PrivateScope(CGF);
|
|
|
PrivateScope.addPrivate(
|
|
|
- VD, [&CGF, VD, XRValue]() -> llvm::Value *{
|
|
|
- auto *LHSTemp = CGF.CreateMemTemp(VD->getType());
|
|
|
+ VD, [&CGF, VD, XRValue]() -> Address {
|
|
|
+ Address LHSTemp = CGF.CreateMemTemp(VD->getType());
|
|
|
CGF.EmitStoreThroughLValue(
|
|
|
XRValue,
|
|
|
- CGF.MakeNaturalAlignAddrLValue(LHSTemp, VD->getType()));
|
|
|
+ CGF.MakeAddrLValue(LHSTemp, VD->getType()));
|
|
|
return LHSTemp;
|
|
|
});
|
|
|
(void)PrivateScope.Privatize();
|