8 năm trước cách đây · 45b902e689
--- a/include/clang/AST/ASTContext.h
+++ b/include/clang/AST/ASTContext.h
@@ -448,12 +448,6 @@ private:
 
				   /// \brief Allocator for partial diagnostics.
			
 
				   PartialDiagnostic::StorageAllocator DiagAllocator;
			
 
				 
			
 
				-  /// Diagnostics that are emitted if and only if the given function is
			
 
				-  /// codegen'ed.  Access these through FunctionDecl::addDeferredDiag() and
			
 
				-  /// FunctionDecl::takeDeferredDiags().
			
 
				-  llvm::DenseMap<const FunctionDecl *, std::vector<PartialDiagnosticAt>>
			
 
				-      DeferredDiags;
			
 
				-
			
 
				   /// \brief The current C++ ABI.
			
 
				   std::unique_ptr<CXXABI> ABI;
			
 
				   CXXABI *createCXXABI(const TargetInfo &T);
			
@@ -604,11 +598,6 @@ public:
 
				     return DiagAllocator;
			
 
				   }
			
 
				 
			
 
				-  decltype(DeferredDiags) &getDeferredDiags() { return DeferredDiags; }
			
 
				-  const decltype(DeferredDiags) &getDeferredDiags() const {
			
 
				-    return DeferredDiags;
			
 
				-  }
			
 
				-
			
 
				   const TargetInfo &getTargetInfo() const { return *Target; }
			
 
				   const TargetInfo *getAuxTargetInfo() const { return AuxTarget; }
			
 
				 
			
--- a/include/clang/AST/Decl.h
+++ b/include/clang/AST/Decl.h
@@ -2271,14 +2271,6 @@ public:
 
				   /// returns 0.
			
 
				   unsigned getMemoryFunctionKind() const;
			
 
				 
			
 
				-  /// Add a diagnostic to be emitted if and when this function is codegen'ed.
			
 
				-  void addDeferredDiag(PartialDiagnosticAt PD);
			
 
				-
			
 
				-  /// Gets this object's list of deferred diagnostics, if there are any.
			
 
				-  ///
			
 
				-  /// Although this is logically const, it clears our list of deferred diags.
			
 
				-  std::vector<PartialDiagnosticAt> takeDeferredDiags() const;
			
 
				-
			
 
				   // Implement isa/cast/dyncast/etc.
			
 
				   static bool classof(const Decl *D) { return classofKind(D->getKind()); }
			
 
				   static bool classofKind(Kind K) {
			
--- a/include/clang/Sema/Sema.h
+++ b/include/clang/Sema/Sema.h
@@ -9245,6 +9245,30 @@ public:
 
				   /// before incrementing, so you can emit an error.
			
 
				   bool PopForceCUDAHostDevice();
			
 
				 
			
 
				+  /// Diagnostics that are emitted only if we discover that the given function
			
 
				+  /// must be codegen'ed.  Because handling these correctly adds overhead to
			
 
				+  /// compilation, this is currently only enabled for CUDA compilations.
			
 
				+  llvm::DenseMap<const FunctionDecl *, std::vector<PartialDiagnosticAt>>
			
 
				+      CUDADeferredDiags;
			
 
				+
			
 
				+  /// Raw encodings of SourceLocations for which CheckCUDACall has emitted a
			
 
				+  /// (maybe deferred) "bad call" diagnostic.  We use this to avoid emitting the
			
 
				+  /// same deferred diag twice.
			
 
				+  llvm::DenseSet<unsigned> LocsWithCUDACallDiags;
			
 
				+
			
 
				+  /// The set of CUDA functions that we've discovered must be emitted by tracing
			
 
				+  /// the call graph.  Functions that we can tell a priori must be emitted
			
 
				+  /// aren't added to this set.
			
 
				+  llvm::DenseSet<FunctionDecl *> CUDAKnownEmittedFns;
			
 
				+
			
 
				+  /// A partial call graph maintained during CUDA compilation to support
			
 
				+  /// deferred diagnostics.  Specifically, functions are only added here if, at
			
 
				+  /// the time they're added, they are not known-emitted.  As soon as we
			
 
				+  /// discover that a function is known-emitted, we remove it and everything it
			
 
				+  /// transitively calls from this set and add those functions to
			
 
				+  /// CUDAKnownEmittedFns.
			
 
				+  llvm::DenseMap<FunctionDecl *, llvm::SetVector<FunctionDecl *>> CUDACallGraph;
			
 
				+
			
 
				   /// Diagnostic builder for CUDA errors which may or may not be deferred.
			
 
				   ///
			
 
				   /// In CUDA, there exist constructs (e.g. variable-length arrays, try/catch)
			
@@ -9298,12 +9322,15 @@ public:
 
				 
			
 
				   private:
			
 
				     struct PartialDiagnosticInfo {
			
 
				-      PartialDiagnosticInfo(SourceLocation Loc, PartialDiagnostic PD,
			
 
				+      PartialDiagnosticInfo(Sema &S, SourceLocation Loc, PartialDiagnostic PD,
			
 
				                             FunctionDecl *Fn)
			
 
				-          : Loc(Loc), PD(std::move(PD)), Fn(Fn) {}
			
 
				+          : S(S), Loc(Loc), PD(std::move(PD)), Fn(Fn) {}
			
 
				 
			
 
				-      ~PartialDiagnosticInfo() { Fn->addDeferredDiag({Loc, std::move(PD)}); }
			
 
				+      ~PartialDiagnosticInfo() {
			
 
				+        S.CUDADeferredDiags[Fn].push_back({Loc, std::move(PD)});
			
 
				+      }
			
 
				 
			
 
				+      Sema &S;
			
 
				       SourceLocation Loc;
			
 
				       PartialDiagnostic PD;
			
 
				       FunctionDecl *Fn;
			
@@ -9322,8 +9349,8 @@ public:
 
				   /// - If CurContext is a __device__ or __global__ function, emits the
			
 
				   ///   diagnostics immediately.
			
 
				   /// - If CurContext is a __host__ __device__ function and we are compiling for
			
 
				-  ///   the device, creates a deferred diagnostic which is emitted if and when
			
 
				-  ///   the function is codegen'ed.
			
 
				+  ///   the device, creates a diagnostic which is emitted if and when we realize
			
 
				+  ///   that the function will be codegen'ed.
			
 
				   ///
			
 
				   /// Example usage:
			
 
				   ///
			
@@ -9397,12 +9424,6 @@ public:
 
				   void maybeAddCUDAHostDeviceAttrs(Scope *S, FunctionDecl *FD,
			
 
				                                    const LookupResult &Previous);
			
 
				 
			
 
				-private:
			
 
				-  /// Raw encodings of SourceLocations for which CheckCUDACall has emitted a
			
 
				-  /// (maybe deferred) "bad call" diagnostic.  We use this to avoid emitting the
			
 
				-  /// same deferred diag twice.
			
 
				-  llvm::DenseSet<unsigned> LocsWithCUDACallDiags;
			
 
				-
			
 
				 public:
			
 
				   /// Check whether we're allowed to call Callee from the current context.
			
 
				   ///
			
--- a/lib/AST/Decl.cpp
+++ b/lib/AST/Decl.cpp
@@ -3473,20 +3473,6 @@ unsigned FunctionDecl::getMemoryFunctionKind() const {
 
				   return 0;
			
 
				 }
			
 
				 
			
 
				-void FunctionDecl::addDeferredDiag(PartialDiagnosticAt PD) {
			
 
				-  getASTContext().getDeferredDiags()[this].push_back(std::move(PD));
			
 
				-}
			
 
				-
			
 
				-std::vector<PartialDiagnosticAt> FunctionDecl::takeDeferredDiags() const {
			
 
				-  auto &DD = getASTContext().getDeferredDiags();
			
 
				-  auto It = DD.find(this);
			
 
				-  if (It == DD.end())
			
 
				-    return {};
			
 
				-  auto Ret = std::move(It->second);
			
 
				-  DD.erase(It);
			
 
				-  return Ret;
			
 
				-}
			
 
				-
			
 
				 //===----------------------------------------------------------------------===//
			
 
				 // FieldDecl Implementation
			
 
				 //===----------------------------------------------------------------------===//
			
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -499,19 +499,6 @@ void CodeGenModule::Release() {
 
				   EmitVersionIdentMetadata();
			
 
				 
			
 
				   EmitTargetMetadata();
			
 
				-
			
 
				-  // Emit any deferred diagnostics gathered during codegen.  We didn't emit them
			
 
				-  // when we first discovered them because that would have halted codegen,
			
 
				-  // preventing us from gathering other deferred diags.
			
 
				-  for (const PartialDiagnosticAt &DiagAt : DeferredDiags) {
			
 
				-    SourceLocation Loc = DiagAt.first;
			
 
				-    const PartialDiagnostic &PD = DiagAt.second;
			
 
				-    DiagnosticBuilder Builder(getDiags().Report(Loc, PD.getDiagID()));
			
 
				-    PD.Emit(Builder);
			
 
				-  }
			
 
				-  // Clear the deferred diags so they don't outlive the ASTContext's
			
 
				-  // PartialDiagnostic allocator.
			
 
				-  DeferredDiags.clear();
			
 
				 }
			
 
				 
			
 
				 void CodeGenModule::UpdateCompletedType(const TagDecl *TD) {
			
@@ -2913,37 +2900,6 @@ void CodeGenModule::EmitGlobalFunctionDefinition(GlobalDecl GD,
 
				                                                  llvm::GlobalValue *GV) {
			
 
				   const auto *D = cast<FunctionDecl>(GD.getDecl());
			
 
				 
			
 
				-  // Emit this function's deferred diagnostics, if none of them are errors.  If
			
 
				-  // any of them are errors, don't codegen the function, but also don't emit any
			
 
				-  // of the diagnostics just yet.  Emitting an error during codegen stops
			
 
				-  // further codegen, and we want to display as many deferred diags as possible.
			
 
				-  // We'll emit the now twice-deferred diags at the very end of codegen.
			
 
				-  //
			
 
				-  // (If a function has both error and non-error diags, we don't emit the
			
 
				-  // non-error diags here, because order can be significant, e.g. with notes
			
 
				-  // that follow errors.)
			
 
				-  auto Diags = D->takeDeferredDiags();
			
 
				-  if (auto *Templ = D->getPrimaryTemplate()) {
			
 
				-    auto TemplDiags = Templ->getAsFunction()->takeDeferredDiags();
			
 
				-    Diags.insert(Diags.end(), TemplDiags.begin(), TemplDiags.end());
			
 
				-  }
			
 
				-  bool HasError = llvm::any_of(Diags, [this](const PartialDiagnosticAt &PDAt) {
			
 
				-    return getDiags().getDiagnosticLevel(PDAt.second.getDiagID(), PDAt.first) >=
			
 
				-           DiagnosticsEngine::Error;
			
 
				-  });
			
 
				-  if (HasError) {
			
 
				-    DeferredDiags.insert(DeferredDiags.end(),
			
 
				-                         std::make_move_iterator(Diags.begin()),
			
 
				-                         std::make_move_iterator(Diags.end()));
			
 
				-    return;
			
 
				-  }
			
 
				-  for (PartialDiagnosticAt &PDAt : Diags) {
			
 
				-    const SourceLocation &Loc = PDAt.first;
			
 
				-    const PartialDiagnostic &PD = PDAt.second;
			
 
				-    DiagnosticBuilder Builder(getDiags().Report(Loc, PD.getDiagID()));
			
 
				-    PD.Emit(Builder);
			
 
				-  }
			
 
				-
			
 
				   // Compute the function info and LLVM type.
			
 
				   const CGFunctionInfo &FI = getTypes().arrangeGlobalDeclaration(GD);
			
 
				   llvm::FunctionType *Ty = getTypes().GetFunctionType(FI);
			
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -490,10 +490,6 @@ private:
 
				   /// MDNodes.
			
 
				   llvm::DenseMap<QualType, llvm::Metadata *> MetadataIdMap;
			
 
				 
			
 
				-  /// Diags gathered from FunctionDecl::takeDeferredDiags().  Emitted at the
			
 
				-  /// very end of codegen.
			
 
				-  std::vector<std::pair<SourceLocation, PartialDiagnostic>> DeferredDiags;
			
 
				-
			
 
				 public:
			
 
				   CodeGenModule(ASTContext &C, const HeaderSearchOptions &headersearchopts,
			
 
				                 const PreprocessorOptions &ppopts,
			
--- a/lib/Sema/SemaCUDA.cpp
+++ b/lib/Sema/SemaCUDA.cpp
@@ -499,27 +499,84 @@ Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc,
 
				     break;
			
 
				   case K_Deferred:
			
 
				     assert(Fn && "Must have a function to attach the deferred diag to.");
			
 
				-    PartialDiagInfo.emplace(Loc, S.PDiag(DiagID), Fn);
			
 
				+    PartialDiagInfo.emplace(S, Loc, S.PDiag(DiagID), Fn);
			
 
				     break;
			
 
				   }
			
 
				 }
			
 
				 
			
 
				+// In CUDA, there are some constructs which may appear in semantically-valid
			
 
				+// code, but trigger errors if we ever generate code for the function in which
			
 
				+// they appear.  Essentially every construct you're not allowed to use on the
			
 
				+// device falls into this category, because you are allowed to use these
			
 
				+// constructs in a __host__ __device__ function, but only if that function is
			
 
				+// never codegen'ed on the device.
			
 
				+//
			
 
				+// To handle semantic checking for these constructs, we keep track of the set of
			
 
				+// functions we know will be emitted, either because we could tell a priori that
			
 
				+// they would be emitted, or because they were transitively called by a
			
 
				+// known-emitted function.
			
 
				+//
			
 
				+// We also keep a partial call graph of which not-known-emitted functions call
			
 
				+// which other not-known-emitted functions.
			
 
				+//
			
 
				+// When we see something which is illegal if the current function is emitted
			
 
				+// (usually by way of CUDADiagIfDeviceCode, CUDADiagIfHostCode, or
			
 
				+// CheckCUDACall), we first check if the current function is known-emitted.  If
			
 
				+// so, we immediately output the diagnostic.
			
 
				+//
			
 
				+// Otherwise, we "defer" the diagnostic.  It sits in Sema::CUDADeferredDiags
			
 
				+// until we discover that the function is known-emitted, at which point we take
			
 
				+// it out of this map and emit the diagnostic.
			
 
				+
			
 
				+// Do we know that we will eventually codegen the given function?
			
 
				+static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) {
			
 
				+  // Templates are emitted when they're instantiated.
			
 
				+  if (FD->isDependentContext())
			
 
				+    return false;
			
 
				+
			
 
				+  // When compiling for device, host functions are never emitted.  Similarly,
			
 
				+  // when compiling for host, device and global functions are never emitted.
			
 
				+  // (Technically, we do emit a host-side stub for global functions, but this
			
 
				+  // doesn't count for our purposes here.)
			
 
				+  Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD);
			
 
				+  if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host)
			
 
				+    return false;
			
 
				+  if (!S.getLangOpts().CUDAIsDevice &&
			
 
				+      (T == Sema::CFT_Device || T == Sema::CFT_Global))
			
 
				+    return false;
			
 
				+
			
 
				+  // Externally-visible and similar functions are always emitted.
			
 
				+  if (S.getASTContext().GetGVALinkageForFunction(FD) > GVA_DiscardableODR)
			
 
				+    return true;
			
 
				+
			
 
				+  // Otherwise, the function is known-emitted if it's in our set of
			
 
				+  // known-emitted functions.
			
 
				+  return S.CUDAKnownEmittedFns.count(FD) > 0;
			
 
				+}
			
 
				+
			
 
				 Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
			
 
				                                                  unsigned DiagID) {
			
 
				   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
			
 
				-  CUDADiagBuilder::Kind DiagKind;
			
 
				-  switch (CurrentCUDATarget()) {
			
 
				-  case CFT_Global:
			
 
				-  case CFT_Device:
			
 
				-    DiagKind = CUDADiagBuilder::K_Immediate;
			
 
				-    break;
			
 
				-  case CFT_HostDevice:
			
 
				-    DiagKind = getLangOpts().CUDAIsDevice ? CUDADiagBuilder::K_Deferred
			
 
				-                                          : CUDADiagBuilder::K_Nop;
			
 
				-    break;
			
 
				-  default:
			
 
				-    DiagKind = CUDADiagBuilder::K_Nop;
			
 
				-  }
			
 
				+  CUDADiagBuilder::Kind DiagKind = [&] {
			
 
				+    switch (CurrentCUDATarget()) {
			
 
				+    case CFT_Global:
			
 
				+    case CFT_Device:
			
 
				+      return CUDADiagBuilder::K_Immediate;
			
 
				+    case CFT_HostDevice:
			
 
				+      // An HD function counts as host code if we're compiling for host, and
			
 
				+      // device code if we're compiling for device.  Defer any errors in device
			
 
				+      // mode until the function is known-emitted.
			
 
				+      if (getLangOpts().CUDAIsDevice) {
			
 
				+        return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
			
 
				+                   ? CUDADiagBuilder::K_Immediate
			
 
				+                   : CUDADiagBuilder::K_Deferred;
			
 
				+      }
			
 
				+      return CUDADiagBuilder::K_Nop;
			
 
				+
			
 
				+    default:
			
 
				+      return CUDADiagBuilder::K_Nop;
			
 
				+    }
			
 
				+  }();
			
 
				   return CUDADiagBuilder(DiagKind, Loc, DiagID,
			
 
				                          dyn_cast<FunctionDecl>(CurContext), *this);
			
 
				 }
			
@@ -527,41 +584,119 @@ Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
 
				 Sema::CUDADiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
			
 
				                                                unsigned DiagID) {
			
 
				   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
			
 
				-  CUDADiagBuilder::Kind DiagKind;
			
 
				-  switch (CurrentCUDATarget()) {
			
 
				-  case CFT_Host:
			
 
				-    DiagKind = CUDADiagBuilder::K_Immediate;
			
 
				-    break;
			
 
				-  case CFT_HostDevice:
			
 
				-    DiagKind = getLangOpts().CUDAIsDevice ? CUDADiagBuilder::K_Nop
			
 
				-                                          : CUDADiagBuilder::K_Deferred;
			
 
				-    break;
			
 
				-  default:
			
 
				-    DiagKind = CUDADiagBuilder::K_Nop;
			
 
				-  }
			
 
				+  CUDADiagBuilder::Kind DiagKind = [&] {
			
 
				+    switch (CurrentCUDATarget()) {
			
 
				+    case CFT_Host:
			
 
				+      return CUDADiagBuilder::K_Immediate;
			
 
				+    case CFT_HostDevice:
			
 
				+      // An HD function counts as host code if we're compiling for host, and
			
 
				+      // device code if we're compiling for device.  Defer any errors in device
			
 
				+      // mode until the function is known-emitted.
			
 
				+      if (getLangOpts().CUDAIsDevice)
			
 
				+        return CUDADiagBuilder::K_Nop;
			
 
				+
			
 
				+      return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
			
 
				+                 ? CUDADiagBuilder::K_Immediate
			
 
				+                 : CUDADiagBuilder::K_Deferred;
			
 
				+    default:
			
 
				+      return CUDADiagBuilder::K_Nop;
			
 
				+    }
			
 
				+  }();
			
 
				   return CUDADiagBuilder(DiagKind, Loc, DiagID,
			
 
				                          dyn_cast<FunctionDecl>(CurContext), *this);
			
 
				 }
			
 
				 
			
 
				+// Emit any deferred diagnostics for FD and erase them from the map in which
			
 
				+// they're stored.
			
 
				+static void EmitDeferredDiags(Sema &S, FunctionDecl *FD) {
			
 
				+  auto It = S.CUDADeferredDiags.find(FD);
			
 
				+  if (It == S.CUDADeferredDiags.end())
			
 
				+    return;
			
 
				+  for (PartialDiagnosticAt &PDAt : It->second) {
			
 
				+    const SourceLocation &Loc = PDAt.first;
			
 
				+    const PartialDiagnostic &PD = PDAt.second;
			
 
				+    DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID()));
			
 
				+    Builder.setForceEmit();
			
 
				+    PD.Emit(Builder);
			
 
				+  }
			
 
				+  S.CUDADeferredDiags.erase(It);
			
 
				+}
			
 
				+
			
 
				+// Indicate that this function (and thus everything it transtively calls) will
			
 
				+// be codegen'ed, and emit any deferred diagnostics on this function and its
			
 
				+// (transitive) callees.
			
 
				+static void MarkKnownEmitted(Sema &S, FunctionDecl *FD) {
			
 
				+  // Nothing to do if we already know that FD is emitted.
			
 
				+  if (IsKnownEmitted(S, FD)) {
			
 
				+    assert(!S.CUDACallGraph.count(FD));
			
 
				+    return;
			
 
				+  }
			
 
				+
			
 
				+  // We've just discovered that FD is known-emitted.  Walk our call graph to see
			
 
				+  // what else we can now discover also must be emitted.
			
 
				+  llvm::SmallVector<FunctionDecl *, 4> Worklist = {FD};
			
 
				+  llvm::SmallSet<FunctionDecl *, 4> Seen;
			
 
				+  Seen.insert(FD);
			
 
				+  while (!Worklist.empty()) {
			
 
				+    FunctionDecl *Caller = Worklist.pop_back_val();
			
 
				+    assert(!IsKnownEmitted(S, Caller) &&
			
 
				+           "Worklist should not contain known-emitted functions.");
			
 
				+    S.CUDAKnownEmittedFns.insert(Caller);
			
 
				+    EmitDeferredDiags(S, Caller);
			
 
				+
			
 
				+    // Deferred diags are often emitted on the template itself, so emit those as
			
 
				+    // well.
			
 
				+    if (auto *Templ = Caller->getPrimaryTemplate())
			
 
				+      EmitDeferredDiags(S, Templ->getAsFunction());
			
 
				+
			
 
				+    // Add all functions called by Caller to our worklist.
			
 
				+    auto CGIt = S.CUDACallGraph.find(Caller);
			
 
				+    if (CGIt == S.CUDACallGraph.end())
			
 
				+      continue;
			
 
				+
			
 
				+    for (FunctionDecl *Callee : CGIt->second) {
			
 
				+      if (Seen.count(Callee) || IsKnownEmitted(S, Callee))
			
 
				+        continue;
			
 
				+      Seen.insert(Callee);
			
 
				+      Worklist.push_back(Callee);
			
 
				+    }
			
 
				+
			
 
				+    // Caller is now known-emitted, so we no longer need to maintain its list of
			
 
				+    // callees in CUDACallGraph.
			
 
				+    S.CUDACallGraph.erase(CGIt);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				 bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
			
 
				   assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
			
 
				   assert(Callee && "Callee may not be null.");
			
 
				+  // FIXME: Is bailing out early correct here?  Should we instead assume that
			
 
				+  // the caller is a global initializer?
			
 
				   FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
			
 
				   if (!Caller)
			
 
				     return true;
			
 
				 
			
 
				-  CUDADiagBuilder::Kind DiagKind;
			
 
				-  switch (IdentifyCUDAPreference(Caller, Callee)) {
			
 
				-  case CFP_Never:
			
 
				-    DiagKind = CUDADiagBuilder::K_Immediate;
			
 
				-    break;
			
 
				-  case CFP_WrongSide:
			
 
				-    assert(Caller && "WrongSide calls require a non-null caller");
			
 
				-    DiagKind = CUDADiagBuilder::K_Deferred;
			
 
				-    break;
			
 
				-  default:
			
 
				-    DiagKind = CUDADiagBuilder::K_Nop;
			
 
				-  }
			
 
				+  bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
			
 
				+  if (CallerKnownEmitted)
			
 
				+    MarkKnownEmitted(*this, Callee);
			
 
				+  else
			
 
				+    CUDACallGraph[Caller].insert(Callee);
			
 
				+
			
 
				+  CUDADiagBuilder::Kind DiagKind = [&] {
			
 
				+    switch (IdentifyCUDAPreference(Caller, Callee)) {
			
 
				+    case CFP_Never:
			
 
				+      return CUDADiagBuilder::K_Immediate;
			
 
				+    case CFP_WrongSide:
			
 
				+      assert(Caller && "WrongSide calls require a non-null caller");
			
 
				+      // If we know the caller will be emitted, we know this wrong-side call
			
 
				+      // will be emitted, so it's an immediate error.  Otherwise, defer the
			
 
				+      // error until we know the caller is emitted.
			
 
				+      return CallerKnownEmitted ? CUDADiagBuilder::K_Immediate
			
 
				+                                : CUDADiagBuilder::K_Deferred;
			
 
				+    default:
			
 
				+      return CUDADiagBuilder::K_Nop;
			
 
				+    }
			
 
				+  }();
			
 
				 
			
 
				   // Avoid emitting this error twice for the same location.  Using a hashtable
			
 
				   // like this is unfortunate, but because we must continue parsing as normal
			
--- a/test/Parser/lambda-attr.cu
+++ b/test/Parser/lambda-attr.cu
@@ -2,7 +2,7 @@
 
				 // RUN: %clang_cc1 -std=c++11 -fsyntax-only -fcuda-is-device -verify %s
			
 
				 
			
 
				 __attribute__((device)) void device_fn() {}
			
 
				-__attribute__((device)) void hd_fn() {}
			
 
				+__attribute__((host, device)) void hd_fn() {}
			
 
				 
			
 
				 __attribute__((device)) void device_attr() {
			
 
				   ([]() __attribute__((device)) { device_fn(); })();
			
--- a/test/SemaCUDA/call-host-fn-from-device.cu
+++ b/test/SemaCUDA/call-host-fn-from-device.cu
@@ -1,4 +1,5 @@
 
				-// RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -fcuda-is-device -emit-llvm -o - -verify
			
 
				+// RUN: %clang_cc1 %s --std=c++11 -triple nvptx-unknown-unknown -fcuda-is-device \
			
 
				+// RUN:   -emit-llvm -o /dev/null -verify
			
 
				 
			
 
				 // Note: This test won't work with -fsyntax-only, because some of these errors
			
 
				 // are emitted during codegen.
			
--- a/test/SemaCUDA/function-overload.cu
+++ b/test/SemaCUDA/function-overload.cu
@@ -170,18 +170,35 @@ __host__ __device__ void hostdevicef() {
 
				   DeviceReturnTy ret_d = d();
			
 
				   DeviceFnPtr fp_cd = cd;
			
 
				   DeviceReturnTy ret_cd = cd();
			
 
				+#if !defined(__CUDA_ARCH__)
			
 
				+  // expected-error@-5 {{reference to __device__ function 'd' in __host__ __device__ function}}
			
 
				+  // expected-error@-5 {{reference to __device__ function 'd' in __host__ __device__ function}}
			
 
				+  // expected-error@-5 {{reference to __device__ function 'cd' in __host__ __device__ function}}
			
 
				+  // expected-error@-5 {{reference to __device__ function 'cd' in __host__ __device__ function}}
			
 
				+#endif
			
 
				 
			
 
				   HostFnPtr fp_h = h;
			
 
				   HostReturnTy ret_h = h();
			
 
				   HostFnPtr fp_ch = ch;
			
 
				   HostReturnTy ret_ch = ch();
			
 
				+#if defined(__CUDA_ARCH__)
			
 
				+  // expected-error@-5 {{reference to __host__ function 'h' in __host__ __device__ function}}
			
 
				+  // expected-error@-5 {{reference to __host__ function 'h' in __host__ __device__ function}}
			
 
				+  // expected-error@-5 {{reference to __host__ function 'ch' in __host__ __device__ function}}
			
 
				+  // expected-error@-5 {{reference to __host__ function 'ch' in __host__ __device__ function}}
			
 
				+#endif
			
 
				 
			
 
				   CurrentFnPtr fp_dh = dh;
			
 
				   CurrentReturnTy ret_dh = dh();
			
 
				   CurrentFnPtr fp_cdh = cdh;
			
 
				   CurrentReturnTy ret_cdh = cdh();
			
 
				 
			
 
				-  g(); // expected-error {{call to global function g not configured}}
			
 
				+  g();
			
 
				+#if defined (__CUDA_ARCH__)
			
 
				+  // expected-error@-2 {{reference to __global__ function 'g' in __host__ __device__ function}}
			
 
				+#else
			
 
				+  // expected-error@-4 {{call to global function g not configured}}
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 // Test for address of overloaded function resolution in the global context.
			
@@ -297,7 +314,11 @@ __device__ void test_device_calls_template_fn() {
 
				 
			
 
				 // If we have a mix of HD and H-only or D-only candidates in the overload set,
			
 
				 // normal C++ overload resolution rules apply first.
			
 
				-template <typename T> TemplateReturnTy template_vs_hd_function(T arg) {
			
 
				+template <typename T> TemplateReturnTy template_vs_hd_function(T arg)
			
 
				+#ifdef __CUDA_ARCH__
			
 
				+//expected-note@-2 {{declared here}}
			
 
				+#endif
			
 
				+{
			
 
				   return TemplateReturnTy();
			
 
				 }
			
 
				 __host__ __device__ HostDeviceReturnTy template_vs_hd_function(float arg) {
			
@@ -307,6 +328,9 @@ __host__ __device__ HostDeviceReturnTy template_vs_hd_function(float arg) {
 
				 __host__ __device__ void test_host_device_calls_hd_template() {
			
 
				   HostDeviceReturnTy ret1 = template_vs_hd_function(1.0f);
			
 
				   TemplateReturnTy ret2 = template_vs_hd_function(1);
			
 
				+#ifdef __CUDA_ARCH__
			
 
				+  // expected-error@-2 {{reference to __host__ function 'template_vs_hd_function<int>' in __host__ __device__ function}}
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 __host__ void test_host_calls_hd_template() {
			
@@ -326,14 +350,30 @@ __device__ void test_device_calls_hd_template() {
 
				 // side of compilation.
			
 
				 __device__ DeviceReturnTy device_only_function(int arg) { return DeviceReturnTy(); }
			
 
				 __device__ DeviceReturnTy2 device_only_function(float arg) { return DeviceReturnTy2(); }
			
 
				+#ifndef __CUDA_ARCH__
			
 
				+  // expected-note@-3 {{'device_only_function' declared here}}
			
 
				+  // expected-note@-3 {{'device_only_function' declared here}}
			
 
				+#endif
			
 
				 __host__ HostReturnTy host_only_function(int arg) { return HostReturnTy(); }
			
 
				 __host__ HostReturnTy2 host_only_function(float arg) { return HostReturnTy2(); }
			
 
				+#ifdef __CUDA_ARCH__
			
 
				+  // expected-note@-3 {{'host_only_function' declared here}}
			
 
				+  // expected-note@-3 {{'host_only_function' declared here}}
			
 
				+#endif
			
 
				 
			
 
				 __host__ __device__ void test_host_device_single_side_overloading() {
			
 
				   DeviceReturnTy ret1 = device_only_function(1);
			
 
				   DeviceReturnTy2 ret2 = device_only_function(1.0f);
			
 
				+#ifndef __CUDA_ARCH__
			
 
				+  // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
			
 
				+  // expected-error@-3 {{reference to __device__ function 'device_only_function' in __host__ __device__ function}}
			
 
				+#endif
			
 
				   HostReturnTy ret3 = host_only_function(1);
			
 
				   HostReturnTy2 ret4 = host_only_function(1.0f);
			
 
				+#ifdef __CUDA_ARCH__
			
 
				+  // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
			
 
				+  // expected-error@-3 {{reference to __host__ function 'host_only_function' in __host__ __device__ function}}
			
 
				+#endif
			
 
				 }
			
 
				 
			
 
				 // Verify that we allow overloading function templates.
			
--- a/test/SemaCUDA/method-target.cu
+++ b/test/SemaCUDA/method-target.cu
@@ -29,7 +29,7 @@ __device__ void foo2(S2& s, int i, float f) {
 
				 // Test 3: device method called from host function
			
 
				 
			
 
				 struct S3 {
			
 
				-  __device__ void method() {} // expected-note {{'method' declared here}};
			
 
				+  __device__ void method() {} // expected-note {{'method' declared here}}
			
 
				 };
			
 
				 
			
 
				 void foo3(S3& s) {
			
@@ -40,11 +40,11 @@ void foo3(S3& s) {
 
				 // Test 4: device method called from host&device function
			
 
				 
			
 
				 struct S4 {
			
 
				-  __device__ void method() {}
			
 
				+  __device__ void method() {}  // expected-note {{'method' declared here}}
			
 
				 };
			
 
				 
			
 
				 __host__ __device__ void foo4(S4& s) {
			
 
				-  s.method();
			
 
				+  s.method(); // expected-error {{reference to __device__ function 'method' in __host__ __device__ function}}
			
 
				 }
			
 
				 
			
 
				 //------------------------------------------------------------------------------
			
--- a/test/SemaCUDA/reference-to-kernel-fn.cu
+++ b/test/SemaCUDA/reference-to-kernel-fn.cu
@@ -18,10 +18,7 @@ typedef void (*fn_ptr_t)();
 
				 __host__ __device__ fn_ptr_t get_ptr_hd() {
			
 
				   return kernel;
			
 
				 #ifdef DEVICE
			
 
				-  // This emits a deferred error on the device, but we don't catch it in this
			
 
				-  // file because the non-deferred error below precludes this.
			
 
				-
			
 
				-  // FIXME-expected-error@-2 {{reference to __global__ function}}
			
 
				+  // expected-error@-2 {{reference to __global__ function}}
			
 
				 #endif
			
 
				 }
			
 
				 __host__ fn_ptr_t get_ptr_h() {