瀏覽代碼

[hip][cuda] Fix the extended lambda name mangling issue.

Summary:
- HIP/CUDA host side needs to use device kernel symbol name to match the
  device side binaries. Without a consistent naming between host- and
  device-side compilations, it's risky that wrong device binaries are
  executed. Consistent naming is usually not an issue until unnamed
  types are used, especially the lambda. In this patch, the consistent
  name mangling is addressed for the extended lambdas, i.e. the lambdas
  annotated with `__device__`.
- In [Itanium C++ ABI][1], the mangling of the lambda is generally
  unspecified unless, in certain cases, ODR rule is required to ensure
  consisent naming cross TUs. The extended lambda is such a case as its
  name may be part of a device kernel function, e.g., the extended
  lambda is used as a template argument and etc. Thus, we need to force
  ODR for extended lambdas as they are referenced in both device- and
  host-side TUs. Furthermore, if a extended lambda is nested in other
  (extended or not) lambdas, those lambdas are required to follow ODR
  naming as well. This patch revises the current lambda mangle numbering
  to force ODR from an extended lambda to all its parent lambdas.
- On the other side, the aforementioned ODR naming should not change
  those lambdas' original linkages, i.e., we cannot replace the original
  `internal` with `linkonce_odr`; otherwise, we may violate ODR in
  general. This patch introduces a new field `HasKnownInternalLinkage`
  in lambda data to decouple the current linkage calculation based on
  mangling number assigned.

[1]: https://itanium-cxx-abi.github.io/cxx-abi/abi.html

Reviewers: tra, rsmith, yaxunl, martong, shafik

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D68818

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@375309 91177308-0d34-0410-b5e6-96231b3b80d8
Michael Liao 5 年之前
父節點
當前提交
3e0834ac1f

+ 21 - 8
include/clang/AST/DeclCXX.h

@@ -389,9 +389,12 @@ class CXXRecordDecl : public RecordDecl {
     /// The number of explicit captures in this lambda.
     /// The number of explicit captures in this lambda.
     unsigned NumExplicitCaptures : 13;
     unsigned NumExplicitCaptures : 13;
 
 
+    /// Has known `internal` linkage.
+    unsigned HasKnownInternalLinkage : 1;
+
     /// The number used to indicate this lambda expression for name
     /// The number used to indicate this lambda expression for name
     /// mangling in the Itanium C++ ABI.
     /// mangling in the Itanium C++ ABI.
-    unsigned ManglingNumber = 0;
+    unsigned ManglingNumber : 31;
 
 
     /// The declaration that provides context for this lambda, if the
     /// The declaration that provides context for this lambda, if the
     /// actual DeclContext does not suffice. This is used for lambdas that
     /// actual DeclContext does not suffice. This is used for lambdas that
@@ -406,12 +409,12 @@ class CXXRecordDecl : public RecordDecl {
     /// The type of the call method.
     /// The type of the call method.
     TypeSourceInfo *MethodTyInfo;
     TypeSourceInfo *MethodTyInfo;
 
 
-    LambdaDefinitionData(CXXRecordDecl *D, TypeSourceInfo *Info,
-                         bool Dependent, bool IsGeneric,
-                         LambdaCaptureDefault CaptureDefault)
-      : DefinitionData(D), Dependent(Dependent), IsGenericLambda(IsGeneric),
-        CaptureDefault(CaptureDefault), NumCaptures(0), NumExplicitCaptures(0),
-        MethodTyInfo(Info) {
+    LambdaDefinitionData(CXXRecordDecl *D, TypeSourceInfo *Info, bool Dependent,
+                         bool IsGeneric, LambdaCaptureDefault CaptureDefault)
+        : DefinitionData(D), Dependent(Dependent), IsGenericLambda(IsGeneric),
+          CaptureDefault(CaptureDefault), NumCaptures(0),
+          NumExplicitCaptures(0), HasKnownInternalLinkage(0), ManglingNumber(0),
+          MethodTyInfo(Info) {
       IsLambda = true;
       IsLambda = true;
 
 
       // C++1z [expr.prim.lambda]p4:
       // C++1z [expr.prim.lambda]p4:
@@ -1705,6 +1708,13 @@ public:
     return getLambdaData().ManglingNumber;
     return getLambdaData().ManglingNumber;
   }
   }
 
 
+  /// The lambda is known to has internal linkage no matter whether it has name
+  /// mangling number.
+  bool hasKnownLambdaInternalLinkage() const {
+    assert(isLambda() && "Not a lambda closure type!");
+    return getLambdaData().HasKnownInternalLinkage;
+  }
+
   /// Retrieve the declaration that provides additional context for a
   /// Retrieve the declaration that provides additional context for a
   /// lambda, when the normal declaration context is not specific enough.
   /// lambda, when the normal declaration context is not specific enough.
   ///
   ///
@@ -1718,9 +1728,12 @@ public:
 
 
   /// Set the mangling number and context declaration for a lambda
   /// Set the mangling number and context declaration for a lambda
   /// class.
   /// class.
-  void setLambdaMangling(unsigned ManglingNumber, Decl *ContextDecl) {
+  void setLambdaMangling(unsigned ManglingNumber, Decl *ContextDecl,
+                         bool HasKnownInternalLinkage = false) {
+    assert(isLambda() && "Not a lambda closure type!");
     getLambdaData().ManglingNumber = ManglingNumber;
     getLambdaData().ManglingNumber = ManglingNumber;
     getLambdaData().ContextDecl = ContextDecl;
     getLambdaData().ContextDecl = ContextDecl;
+    getLambdaData().HasKnownInternalLinkage = HasKnownInternalLinkage;
   }
   }
 
 
   /// Returns the inheritance model used for this record.
   /// Returns the inheritance model used for this record.

+ 11 - 6
include/clang/Sema/Sema.h

@@ -5926,12 +5926,17 @@ public:
                                          LambdaCaptureDefault CaptureDefault);
                                          LambdaCaptureDefault CaptureDefault);
 
 
   /// Start the definition of a lambda expression.
   /// Start the definition of a lambda expression.
-  CXXMethodDecl *
-  startLambdaDefinition(CXXRecordDecl *Class, SourceRange IntroducerRange,
-                        TypeSourceInfo *MethodType, SourceLocation EndLoc,
-                        ArrayRef<ParmVarDecl *> Params,
-                        ConstexprSpecKind ConstexprKind,
-                        Optional<std::pair<unsigned, Decl *>> Mangling = None);
+  CXXMethodDecl *startLambdaDefinition(CXXRecordDecl *Class,
+                                       SourceRange IntroducerRange,
+                                       TypeSourceInfo *MethodType,
+                                       SourceLocation EndLoc,
+                                       ArrayRef<ParmVarDecl *> Params,
+                                       ConstexprSpecKind ConstexprKind);
+
+  /// Number lambda for linkage purposes if necessary.
+  void handleLambdaNumbering(
+      CXXRecordDecl *Class, CXXMethodDecl *Method,
+      Optional<std::tuple<unsigned, bool, Decl *>> Mangling = None);
 
 
   /// Endow the lambda scope info with the relevant properties.
   /// Endow the lambda scope info with the relevant properties.
   void buildLambdaScope(sema::LambdaScopeInfo *LSI,
   void buildLambdaScope(sema::LambdaScopeInfo *LSI,

+ 2 - 1
lib/AST/ASTImporter.cpp

@@ -2694,7 +2694,8 @@ ExpectedDecl ASTNodeImporter::VisitRecordDecl(RecordDecl *D) {
       ExpectedDecl CDeclOrErr = import(DCXX->getLambdaContextDecl());
       ExpectedDecl CDeclOrErr = import(DCXX->getLambdaContextDecl());
       if (!CDeclOrErr)
       if (!CDeclOrErr)
         return CDeclOrErr.takeError();
         return CDeclOrErr.takeError();
-      D2CXX->setLambdaMangling(DCXX->getLambdaManglingNumber(), *CDeclOrErr);
+      D2CXX->setLambdaMangling(DCXX->getLambdaManglingNumber(), *CDeclOrErr,
+                               DCXX->hasKnownLambdaInternalLinkage());
     } else if (DCXX->isInjectedClassName()) {
     } else if (DCXX->isInjectedClassName()) {
       // We have to be careful to do a similar dance to the one in
       // We have to be careful to do a similar dance to the one in
       // Sema::ActOnStartCXXMemberDeclarations
       // Sema::ActOnStartCXXMemberDeclarations

+ 4 - 2
lib/AST/Decl.cpp

@@ -1385,7 +1385,8 @@ LinkageInfo LinkageComputer::computeLVForDecl(const NamedDecl *D,
     case Decl::CXXRecord: {
     case Decl::CXXRecord: {
       const auto *Record = cast<CXXRecordDecl>(D);
       const auto *Record = cast<CXXRecordDecl>(D);
       if (Record->isLambda()) {
       if (Record->isLambda()) {
-        if (!Record->getLambdaManglingNumber()) {
+        if (Record->hasKnownLambdaInternalLinkage() ||
+            !Record->getLambdaManglingNumber()) {
           // This lambda has no mangling number, so it's internal.
           // This lambda has no mangling number, so it's internal.
           return getInternalLinkageFor(D);
           return getInternalLinkageFor(D);
         }
         }
@@ -1402,7 +1403,8 @@ LinkageInfo LinkageComputer::computeLVForDecl(const NamedDecl *D,
         //  };
         //  };
         const CXXRecordDecl *OuterMostLambda =
         const CXXRecordDecl *OuterMostLambda =
             getOutermostEnclosingLambda(Record);
             getOutermostEnclosingLambda(Record);
-        if (!OuterMostLambda->getLambdaManglingNumber())
+        if (OuterMostLambda->hasKnownLambdaInternalLinkage() ||
+            !OuterMostLambda->getLambdaManglingNumber())
           return getInternalLinkageFor(D);
           return getInternalLinkageFor(D);
 
 
         return getLVForClosure(
         return getLVForClosure(

+ 56 - 17
lib/Sema/SemaLambda.cpp

@@ -335,7 +335,7 @@ Sema::getCurrentMangleNumberContext(const DeclContext *DC) {
   case StaticDataMember:
   case StaticDataMember:
     //  -- the initializers of nonspecialized static members of template classes
     //  -- the initializers of nonspecialized static members of template classes
     if (!IsInNonspecializedTemplate)
     if (!IsInNonspecializedTemplate)
-      return std::make_tuple(nullptr, nullptr);
+      return std::make_tuple(nullptr, ManglingContextDecl);
     // Fall through to get the current context.
     // Fall through to get the current context.
     LLVM_FALLTHROUGH;
     LLVM_FALLTHROUGH;
 
 
@@ -356,14 +356,15 @@ Sema::getCurrentMangleNumberContext(const DeclContext *DC) {
   llvm_unreachable("unexpected context");
   llvm_unreachable("unexpected context");
 }
 }
 
 
-CXXMethodDecl *Sema::startLambdaDefinition(
-    CXXRecordDecl *Class, SourceRange IntroducerRange,
-    TypeSourceInfo *MethodTypeInfo, SourceLocation EndLoc,
-    ArrayRef<ParmVarDecl *> Params, ConstexprSpecKind ConstexprKind,
-    Optional<std::pair<unsigned, Decl *>> Mangling) {
+CXXMethodDecl *Sema::startLambdaDefinition(CXXRecordDecl *Class,
+                                           SourceRange IntroducerRange,
+                                           TypeSourceInfo *MethodTypeInfo,
+                                           SourceLocation EndLoc,
+                                           ArrayRef<ParmVarDecl *> Params,
+                                           ConstexprSpecKind ConstexprKind) {
   QualType MethodType = MethodTypeInfo->getType();
   QualType MethodType = MethodTypeInfo->getType();
   TemplateParameterList *TemplateParams =
   TemplateParameterList *TemplateParams =
-            getGenericLambdaTemplateParameterList(getCurLambda(), *this);
+      getGenericLambdaTemplateParameterList(getCurLambda(), *this);
   // If a lambda appears in a dependent context or is a generic lambda (has
   // If a lambda appears in a dependent context or is a generic lambda (has
   // template parameters) and has an 'auto' return type, deduce it to a
   // template parameters) and has an 'auto' return type, deduce it to a
   // dependent type.
   // dependent type.
@@ -425,20 +426,55 @@ CXXMethodDecl *Sema::startLambdaDefinition(
       P->setOwningFunction(Method);
       P->setOwningFunction(Method);
   }
   }
 
 
+  return Method;
+}
+
+void Sema::handleLambdaNumbering(
+    CXXRecordDecl *Class, CXXMethodDecl *Method,
+    Optional<std::tuple<unsigned, bool, Decl *>> Mangling) {
   if (Mangling) {
   if (Mangling) {
-    Class->setLambdaMangling(Mangling->first, Mangling->second);
-  } else {
-    MangleNumberingContext *MCtx;
+    unsigned ManglingNumber;
+    bool HasKnownInternalLinkage;
     Decl *ManglingContextDecl;
     Decl *ManglingContextDecl;
-    std::tie(MCtx, ManglingContextDecl) =
-        getCurrentMangleNumberContext(Class->getDeclContext());
-    if (MCtx) {
-      unsigned ManglingNumber = MCtx->getManglingNumber(Method);
-      Class->setLambdaMangling(ManglingNumber, ManglingContextDecl);
-    }
+    std::tie(ManglingNumber, HasKnownInternalLinkage, ManglingContextDecl) =
+        Mangling.getValue();
+    Class->setLambdaMangling(ManglingNumber, ManglingContextDecl,
+                             HasKnownInternalLinkage);
+    return;
   }
   }
 
 
-  return Method;
+  auto getMangleNumberingContext =
+      [this](CXXRecordDecl *Class, Decl *ManglingContextDecl) -> MangleNumberingContext * {
+    // Get mangle numbering context if there's any extra decl context.
+    if (ManglingContextDecl)
+      return &Context.getManglingNumberContext(
+          ASTContext::NeedExtraManglingDecl, ManglingContextDecl);
+    // Otherwise, from that lambda's decl context.
+    auto DC = Class->getDeclContext();
+    while (auto *CD = dyn_cast<CapturedDecl>(DC))
+      DC = CD->getParent();
+    return &Context.getManglingNumberContext(DC);
+  };
+
+  MangleNumberingContext *MCtx;
+  Decl *ManglingContextDecl;
+  std::tie(MCtx, ManglingContextDecl) =
+      getCurrentMangleNumberContext(Class->getDeclContext());
+  bool HasKnownInternalLinkage = false;
+  if (!MCtx && getLangOpts().CUDA) {
+    // Force lambda numbering in CUDA/HIP as we need to name lambdas following
+    // ODR. Both device- and host-compilation need to have a consistent naming
+    // on kernel functions. As lambdas are potential part of these `__global__`
+    // function names, they needs numbering following ODR.
+    MCtx = getMangleNumberingContext(Class, ManglingContextDecl);
+    assert(MCtx && "Retrieving mangle numbering context failed!");
+    HasKnownInternalLinkage = true;
+  }
+  if (MCtx) {
+    unsigned ManglingNumber = MCtx->getManglingNumber(Method);
+    Class->setLambdaMangling(ManglingNumber, ManglingContextDecl,
+                             HasKnownInternalLinkage);
+  }
 }
 }
 
 
 void Sema::buildLambdaScope(LambdaScopeInfo *LSI,
 void Sema::buildLambdaScope(LambdaScopeInfo *LSI,
@@ -951,6 +987,9 @@ void Sema::ActOnStartOfLambdaDefinition(LambdaIntroducer &Intro,
   if (getLangOpts().CUDA)
   if (getLangOpts().CUDA)
     CUDASetLambdaAttrs(Method);
     CUDASetLambdaAttrs(Method);
 
 
+  // Number the lambda for linkage purposes if necessary.
+  handleLambdaNumbering(Class, Method);
+
   // Introduce the function call operator as the current declaration context.
   // Introduce the function call operator as the current declaration context.
   PushDeclContext(CurScope, Method);
   PushDeclContext(CurScope, Method);
 
 

+ 8 - 4
lib/Sema/TreeTransform.h

@@ -11497,17 +11497,18 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
                                         E->getCaptureDefault());
                                         E->getCaptureDefault());
   getDerived().transformedLocalDecl(OldClass, {Class});
   getDerived().transformedLocalDecl(OldClass, {Class});
 
 
-  Optional<std::pair<unsigned, Decl*>> Mangling;
+  Optional<std::tuple<unsigned, bool, Decl *>> Mangling;
   if (getDerived().ReplacingOriginal())
   if (getDerived().ReplacingOriginal())
-    Mangling = std::make_pair(OldClass->getLambdaManglingNumber(),
-                              OldClass->getLambdaContextDecl());
+    Mangling = std::make_tuple(OldClass->getLambdaManglingNumber(),
+                               OldClass->hasKnownLambdaInternalLinkage(),
+                               OldClass->getLambdaContextDecl());
 
 
   // Build the call operator.
   // Build the call operator.
   CXXMethodDecl *NewCallOperator = getSema().startLambdaDefinition(
   CXXMethodDecl *NewCallOperator = getSema().startLambdaDefinition(
       Class, E->getIntroducerRange(), NewCallOpTSI,
       Class, E->getIntroducerRange(), NewCallOpTSI,
       E->getCallOperator()->getEndLoc(),
       E->getCallOperator()->getEndLoc(),
       NewCallOpTSI->getTypeLoc().castAs<FunctionProtoTypeLoc>().getParams(),
       NewCallOpTSI->getTypeLoc().castAs<FunctionProtoTypeLoc>().getParams(),
-      E->getCallOperator()->getConstexprKind(), Mangling);
+      E->getCallOperator()->getConstexprKind());
 
 
   LSI->CallOperator = NewCallOperator;
   LSI->CallOperator = NewCallOperator;
 
 
@@ -11527,6 +11528,9 @@ TreeTransform<Derived>::TransformLambdaExpr(LambdaExpr *E) {
   getDerived().transformAttrs(E->getCallOperator(), NewCallOperator);
   getDerived().transformAttrs(E->getCallOperator(), NewCallOperator);
   getDerived().transformedLocalDecl(E->getCallOperator(), {NewCallOperator});
   getDerived().transformedLocalDecl(E->getCallOperator(), {NewCallOperator});
 
 
+  // Number the lambda for linkage purposes if necessary.
+  getSema().handleLambdaNumbering(Class, NewCallOperator, Mangling);
+
   // Introduce the context of the call operator.
   // Introduce the context of the call operator.
   Sema::ContextRAII SavedContext(getSema(), NewCallOperator,
   Sema::ContextRAII SavedContext(getSema(), NewCallOperator,
                                  /*NewThisContext*/false);
                                  /*NewThisContext*/false);

+ 1 - 0
lib/Serialization/ASTReaderDecl.cpp

@@ -1690,6 +1690,7 @@ void ASTDeclReader::ReadCXXDefinitionData(
     Lambda.CaptureDefault = Record.readInt();
     Lambda.CaptureDefault = Record.readInt();
     Lambda.NumCaptures = Record.readInt();
     Lambda.NumCaptures = Record.readInt();
     Lambda.NumExplicitCaptures = Record.readInt();
     Lambda.NumExplicitCaptures = Record.readInt();
+    Lambda.HasKnownInternalLinkage = Record.readInt();
     Lambda.ManglingNumber = Record.readInt();
     Lambda.ManglingNumber = Record.readInt();
     Lambda.ContextDecl = ReadDeclID();
     Lambda.ContextDecl = ReadDeclID();
     Lambda.Captures = (Capture *)Reader.getContext().Allocate(
     Lambda.Captures = (Capture *)Reader.getContext().Allocate(

+ 1 - 0
lib/Serialization/ASTWriter.cpp

@@ -6224,6 +6224,7 @@ void ASTRecordWriter::AddCXXDefinitionData(const CXXRecordDecl *D) {
     Record->push_back(Lambda.CaptureDefault);
     Record->push_back(Lambda.CaptureDefault);
     Record->push_back(Lambda.NumCaptures);
     Record->push_back(Lambda.NumCaptures);
     Record->push_back(Lambda.NumExplicitCaptures);
     Record->push_back(Lambda.NumExplicitCaptures);
+    Record->push_back(Lambda.HasKnownInternalLinkage);
     Record->push_back(Lambda.ManglingNumber);
     Record->push_back(Lambda.ManglingNumber);
     AddDeclRef(D->getLambdaContextDecl());
     AddDeclRef(D->getLambdaContextDecl());
     AddTypeSourceInfo(Lambda.MethodTyInfo);
     AddTypeSourceInfo(Lambda.MethodTyInfo);

+ 39 - 0
test/CodeGenCUDA/unnamed-types.cu

@@ -0,0 +1,39 @@
+// RUN: %clang_cc1 -std=c++11 -x hip -triple x86_64-linux-gnu -aux-triple amdgcn-amd-amdhsa -emit-llvm %s -o - | FileCheck %s --check-prefix=HOST
+// RUN: %clang_cc1 -std=c++11 -x hip -triple amdgcn-amd-amdhsa -fcuda-is-device -emit-llvm %s -o - | FileCheck %s --check-prefix=DEVICE
+
+#include "Inputs/cuda.h"
+
+// HOST: @0 = private unnamed_addr constant [43 x i8] c"_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_\00", align 1
+
+__device__ float d0(float x) {
+  return [](float x) { return x + 2.f; }(x);
+}
+
+__device__ float d1(float x) {
+  return [](float x) { return x * 2.f; }(x);
+}
+
+// DEVICE: amdgpu_kernel void @_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_(
+template <typename F>
+__global__ void k0(float *p, F f) {
+  p[0] = f(p[0]) + d0(p[1]) + d1(p[2]);
+}
+
+void f0(float *p) {
+  [](float *p) {
+    *p = 1.f;
+  }(p);
+}
+
+// The inner/outer lambdas are required to be mangled following ODR but their
+// linkages are still required to keep the original `internal` linkage.
+
+// HOST: define internal void @_ZZ2f1PfENKUlS_E_clES_(
+// DEVICE: define internal float @_ZZZ2f1PfENKUlS_E_clES_ENKUlfE_clEf(
+void f1(float *p) {
+  [](float *p) {
+    k0<<<1,1>>>(p, [] __device__ (float x) { return x + 1.f; });
+  }(p);
+}
+// HOST: @__hip_register_globals
+// HOST: __hipRegisterFunction{{.*}}@_Z2k0IZZ2f1PfENKUlS0_E_clES0_EUlfE_EvS0_T_{{.*}}@0