|
@@ -2671,9 +2671,18 @@ llvm::GlobalValue::LinkageTypes CodeGenModule::getLLVMLinkageForDeclarator(
|
|
|
// explicit instantiations can occur in multiple translation units
|
|
|
// and must all be equivalent. However, we are not allowed to
|
|
|
// throw away these explicit instantiations.
|
|
|
- if (Linkage == GVA_StrongODR)
|
|
|
- return !Context.getLangOpts().AppleKext ? llvm::Function::WeakODRLinkage
|
|
|
- : llvm::Function::ExternalLinkage;
|
|
|
+ //
|
|
|
+ // We don't currently support CUDA device code spread out across multiple TUs,
|
|
|
+ // so say that CUDA templates are either external (for kernels) or internal.
|
|
|
+ // This lets llvm perform aggressive inter-procedural optimizations.
|
|
|
+ if (Linkage == GVA_StrongODR) {
|
|
|
+ if (Context.getLangOpts().AppleKext)
|
|
|
+ return llvm::Function::ExternalLinkage;
|
|
|
+ if (Context.getLangOpts().CUDA && Context.getLangOpts().CUDAIsDevice)
|
|
|
+ return D->hasAttr<CUDAGlobalAttr>() ? llvm::Function::ExternalLinkage
|
|
|
+ : llvm::Function::InternalLinkage;
|
|
|
+ return llvm::Function::WeakODRLinkage;
|
|
|
+ }
|
|
|
|
|
|
// C++ doesn't have tentative definitions and thus cannot have common
|
|
|
// linkage.
|