SemaCUDA.cpp 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885
  1. //===--- SemaCUDA.cpp - Semantic Analysis for CUDA constructs -------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. /// \file
  10. /// \brief This file implements semantic analysis for CUDA constructs.
  11. ///
  12. //===----------------------------------------------------------------------===//
  13. #include "clang/AST/ASTContext.h"
  14. #include "clang/AST/Decl.h"
  15. #include "clang/AST/ExprCXX.h"
  16. #include "clang/Lex/Preprocessor.h"
  17. #include "clang/Sema/Lookup.h"
  18. #include "clang/Sema/Sema.h"
  19. #include "clang/Sema/SemaDiagnostic.h"
  20. #include "clang/Sema/SemaInternal.h"
  21. #include "clang/Sema/Template.h"
  22. #include "llvm/ADT/Optional.h"
  23. #include "llvm/ADT/SmallVector.h"
  24. using namespace clang;
  25. void Sema::PushForceCUDAHostDevice() {
  26. assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
  27. ForceCUDAHostDeviceDepth++;
  28. }
  29. bool Sema::PopForceCUDAHostDevice() {
  30. assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
  31. if (ForceCUDAHostDeviceDepth == 0)
  32. return false;
  33. ForceCUDAHostDeviceDepth--;
  34. return true;
  35. }
  36. ExprResult Sema::ActOnCUDAExecConfigExpr(Scope *S, SourceLocation LLLLoc,
  37. MultiExprArg ExecConfig,
  38. SourceLocation GGGLoc) {
  39. FunctionDecl *ConfigDecl = Context.getcudaConfigureCallDecl();
  40. if (!ConfigDecl)
  41. return ExprError(Diag(LLLLoc, diag::err_undeclared_var_use)
  42. << "cudaConfigureCall");
  43. QualType ConfigQTy = ConfigDecl->getType();
  44. DeclRefExpr *ConfigDR = new (Context)
  45. DeclRefExpr(ConfigDecl, false, ConfigQTy, VK_LValue, LLLLoc);
  46. MarkFunctionReferenced(LLLLoc, ConfigDecl);
  47. return ActOnCallExpr(S, ConfigDR, LLLLoc, ExecConfig, GGGLoc, nullptr,
  48. /*IsExecConfig=*/true);
  49. }
  50. Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const AttributeList *Attr) {
  51. bool HasHostAttr = false;
  52. bool HasDeviceAttr = false;
  53. bool HasGlobalAttr = false;
  54. bool HasInvalidTargetAttr = false;
  55. while (Attr) {
  56. switch(Attr->getKind()){
  57. case AttributeList::AT_CUDAGlobal:
  58. HasGlobalAttr = true;
  59. break;
  60. case AttributeList::AT_CUDAHost:
  61. HasHostAttr = true;
  62. break;
  63. case AttributeList::AT_CUDADevice:
  64. HasDeviceAttr = true;
  65. break;
  66. case AttributeList::AT_CUDAInvalidTarget:
  67. HasInvalidTargetAttr = true;
  68. break;
  69. default:
  70. break;
  71. }
  72. Attr = Attr->getNext();
  73. }
  74. if (HasInvalidTargetAttr)
  75. return CFT_InvalidTarget;
  76. if (HasGlobalAttr)
  77. return CFT_Global;
  78. if (HasHostAttr && HasDeviceAttr)
  79. return CFT_HostDevice;
  80. if (HasDeviceAttr)
  81. return CFT_Device;
  82. return CFT_Host;
  83. }
  84. /// IdentifyCUDATarget - Determine the CUDA compilation target for this function
  85. Sema::CUDAFunctionTarget Sema::IdentifyCUDATarget(const FunctionDecl *D) {
  86. // Code that lives outside a function is run on the host.
  87. if (D == nullptr)
  88. return CFT_Host;
  89. if (D->hasAttr<CUDAInvalidTargetAttr>())
  90. return CFT_InvalidTarget;
  91. if (D->hasAttr<CUDAGlobalAttr>())
  92. return CFT_Global;
  93. if (D->hasAttr<CUDADeviceAttr>()) {
  94. if (D->hasAttr<CUDAHostAttr>())
  95. return CFT_HostDevice;
  96. return CFT_Device;
  97. } else if (D->hasAttr<CUDAHostAttr>()) {
  98. return CFT_Host;
  99. } else if (D->isImplicit()) {
  100. // Some implicit declarations (like intrinsic functions) are not marked.
  101. // Set the most lenient target on them for maximal flexibility.
  102. return CFT_HostDevice;
  103. }
  104. return CFT_Host;
  105. }
  106. // * CUDA Call preference table
  107. //
  108. // F - from,
  109. // T - to
  110. // Ph - preference in host mode
  111. // Pd - preference in device mode
  112. // H - handled in (x)
  113. // Preferences: N:native, SS:same side, HD:host-device, WS:wrong side, --:never.
  114. //
  115. // | F | T | Ph | Pd | H |
  116. // |----+----+-----+-----+-----+
  117. // | d | d | N | N | (c) |
  118. // | d | g | -- | -- | (a) |
  119. // | d | h | -- | -- | (e) |
  120. // | d | hd | HD | HD | (b) |
  121. // | g | d | N | N | (c) |
  122. // | g | g | -- | -- | (a) |
  123. // | g | h | -- | -- | (e) |
  124. // | g | hd | HD | HD | (b) |
  125. // | h | d | -- | -- | (e) |
  126. // | h | g | N | N | (c) |
  127. // | h | h | N | N | (c) |
  128. // | h | hd | HD | HD | (b) |
  129. // | hd | d | WS | SS | (d) |
  130. // | hd | g | SS | -- |(d/a)|
  131. // | hd | h | SS | WS | (d) |
  132. // | hd | hd | HD | HD | (b) |
  133. Sema::CUDAFunctionPreference
  134. Sema::IdentifyCUDAPreference(const FunctionDecl *Caller,
  135. const FunctionDecl *Callee) {
  136. assert(Callee && "Callee must be valid.");
  137. CUDAFunctionTarget CallerTarget = IdentifyCUDATarget(Caller);
  138. CUDAFunctionTarget CalleeTarget = IdentifyCUDATarget(Callee);
  139. // If one of the targets is invalid, the check always fails, no matter what
  140. // the other target is.
  141. if (CallerTarget == CFT_InvalidTarget || CalleeTarget == CFT_InvalidTarget)
  142. return CFP_Never;
  143. // (a) Can't call global from some contexts until we support CUDA's
  144. // dynamic parallelism.
  145. if (CalleeTarget == CFT_Global &&
  146. (CallerTarget == CFT_Global || CallerTarget == CFT_Device))
  147. return CFP_Never;
  148. // (b) Calling HostDevice is OK for everyone.
  149. if (CalleeTarget == CFT_HostDevice)
  150. return CFP_HostDevice;
  151. // (c) Best case scenarios
  152. if (CalleeTarget == CallerTarget ||
  153. (CallerTarget == CFT_Host && CalleeTarget == CFT_Global) ||
  154. (CallerTarget == CFT_Global && CalleeTarget == CFT_Device))
  155. return CFP_Native;
  156. // (d) HostDevice behavior depends on compilation mode.
  157. if (CallerTarget == CFT_HostDevice) {
  158. // It's OK to call a compilation-mode matching function from an HD one.
  159. if ((getLangOpts().CUDAIsDevice && CalleeTarget == CFT_Device) ||
  160. (!getLangOpts().CUDAIsDevice &&
  161. (CalleeTarget == CFT_Host || CalleeTarget == CFT_Global)))
  162. return CFP_SameSide;
  163. // Calls from HD to non-mode-matching functions (i.e., to host functions
  164. // when compiling in device mode or to device functions when compiling in
  165. // host mode) are allowed at the sema level, but eventually rejected if
  166. // they're ever codegened. TODO: Reject said calls earlier.
  167. return CFP_WrongSide;
  168. }
  169. // (e) Calling across device/host boundary is not something you should do.
  170. if ((CallerTarget == CFT_Host && CalleeTarget == CFT_Device) ||
  171. (CallerTarget == CFT_Device && CalleeTarget == CFT_Host) ||
  172. (CallerTarget == CFT_Global && CalleeTarget == CFT_Host))
  173. return CFP_Never;
  174. llvm_unreachable("All cases should've been handled by now.");
  175. }
  176. void Sema::EraseUnwantedCUDAMatches(
  177. const FunctionDecl *Caller,
  178. SmallVectorImpl<std::pair<DeclAccessPair, FunctionDecl *>> &Matches) {
  179. if (Matches.size() <= 1)
  180. return;
  181. using Pair = std::pair<DeclAccessPair, FunctionDecl*>;
  182. // Gets the CUDA function preference for a call from Caller to Match.
  183. auto GetCFP = [&](const Pair &Match) {
  184. return IdentifyCUDAPreference(Caller, Match.second);
  185. };
  186. // Find the best call preference among the functions in Matches.
  187. CUDAFunctionPreference BestCFP = GetCFP(*std::max_element(
  188. Matches.begin(), Matches.end(),
  189. [&](const Pair &M1, const Pair &M2) { return GetCFP(M1) < GetCFP(M2); }));
  190. // Erase all functions with lower priority.
  191. Matches.erase(
  192. llvm::remove_if(
  193. Matches, [&](const Pair &Match) { return GetCFP(Match) < BestCFP; }),
  194. Matches.end());
  195. }
  196. /// When an implicitly-declared special member has to invoke more than one
  197. /// base/field special member, conflicts may occur in the targets of these
  198. /// members. For example, if one base's member __host__ and another's is
  199. /// __device__, it's a conflict.
  200. /// This function figures out if the given targets \param Target1 and
  201. /// \param Target2 conflict, and if they do not it fills in
  202. /// \param ResolvedTarget with a target that resolves for both calls.
  203. /// \return true if there's a conflict, false otherwise.
  204. static bool
  205. resolveCalleeCUDATargetConflict(Sema::CUDAFunctionTarget Target1,
  206. Sema::CUDAFunctionTarget Target2,
  207. Sema::CUDAFunctionTarget *ResolvedTarget) {
  208. // Only free functions and static member functions may be global.
  209. assert(Target1 != Sema::CFT_Global);
  210. assert(Target2 != Sema::CFT_Global);
  211. if (Target1 == Sema::CFT_HostDevice) {
  212. *ResolvedTarget = Target2;
  213. } else if (Target2 == Sema::CFT_HostDevice) {
  214. *ResolvedTarget = Target1;
  215. } else if (Target1 != Target2) {
  216. return true;
  217. } else {
  218. *ResolvedTarget = Target1;
  219. }
  220. return false;
  221. }
  222. bool Sema::inferCUDATargetForImplicitSpecialMember(CXXRecordDecl *ClassDecl,
  223. CXXSpecialMember CSM,
  224. CXXMethodDecl *MemberDecl,
  225. bool ConstRHS,
  226. bool Diagnose) {
  227. llvm::Optional<CUDAFunctionTarget> InferredTarget;
  228. // We're going to invoke special member lookup; mark that these special
  229. // members are called from this one, and not from its caller.
  230. ContextRAII MethodContext(*this, MemberDecl);
  231. // Look for special members in base classes that should be invoked from here.
  232. // Infer the target of this member base on the ones it should call.
  233. // Skip direct and indirect virtual bases for abstract classes.
  234. llvm::SmallVector<const CXXBaseSpecifier *, 16> Bases;
  235. for (const auto &B : ClassDecl->bases()) {
  236. if (!B.isVirtual()) {
  237. Bases.push_back(&B);
  238. }
  239. }
  240. if (!ClassDecl->isAbstract()) {
  241. for (const auto &VB : ClassDecl->vbases()) {
  242. Bases.push_back(&VB);
  243. }
  244. }
  245. for (const auto *B : Bases) {
  246. const RecordType *BaseType = B->getType()->getAs<RecordType>();
  247. if (!BaseType) {
  248. continue;
  249. }
  250. CXXRecordDecl *BaseClassDecl = cast<CXXRecordDecl>(BaseType->getDecl());
  251. Sema::SpecialMemberOverloadResult *SMOR =
  252. LookupSpecialMember(BaseClassDecl, CSM,
  253. /* ConstArg */ ConstRHS,
  254. /* VolatileArg */ false,
  255. /* RValueThis */ false,
  256. /* ConstThis */ false,
  257. /* VolatileThis */ false);
  258. if (!SMOR || !SMOR->getMethod()) {
  259. continue;
  260. }
  261. CUDAFunctionTarget BaseMethodTarget = IdentifyCUDATarget(SMOR->getMethod());
  262. if (!InferredTarget.hasValue()) {
  263. InferredTarget = BaseMethodTarget;
  264. } else {
  265. bool ResolutionError = resolveCalleeCUDATargetConflict(
  266. InferredTarget.getValue(), BaseMethodTarget,
  267. InferredTarget.getPointer());
  268. if (ResolutionError) {
  269. if (Diagnose) {
  270. Diag(ClassDecl->getLocation(),
  271. diag::note_implicit_member_target_infer_collision)
  272. << (unsigned)CSM << InferredTarget.getValue() << BaseMethodTarget;
  273. }
  274. MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
  275. return true;
  276. }
  277. }
  278. }
  279. // Same as for bases, but now for special members of fields.
  280. for (const auto *F : ClassDecl->fields()) {
  281. if (F->isInvalidDecl()) {
  282. continue;
  283. }
  284. const RecordType *FieldType =
  285. Context.getBaseElementType(F->getType())->getAs<RecordType>();
  286. if (!FieldType) {
  287. continue;
  288. }
  289. CXXRecordDecl *FieldRecDecl = cast<CXXRecordDecl>(FieldType->getDecl());
  290. Sema::SpecialMemberOverloadResult *SMOR =
  291. LookupSpecialMember(FieldRecDecl, CSM,
  292. /* ConstArg */ ConstRHS && !F->isMutable(),
  293. /* VolatileArg */ false,
  294. /* RValueThis */ false,
  295. /* ConstThis */ false,
  296. /* VolatileThis */ false);
  297. if (!SMOR || !SMOR->getMethod()) {
  298. continue;
  299. }
  300. CUDAFunctionTarget FieldMethodTarget =
  301. IdentifyCUDATarget(SMOR->getMethod());
  302. if (!InferredTarget.hasValue()) {
  303. InferredTarget = FieldMethodTarget;
  304. } else {
  305. bool ResolutionError = resolveCalleeCUDATargetConflict(
  306. InferredTarget.getValue(), FieldMethodTarget,
  307. InferredTarget.getPointer());
  308. if (ResolutionError) {
  309. if (Diagnose) {
  310. Diag(ClassDecl->getLocation(),
  311. diag::note_implicit_member_target_infer_collision)
  312. << (unsigned)CSM << InferredTarget.getValue()
  313. << FieldMethodTarget;
  314. }
  315. MemberDecl->addAttr(CUDAInvalidTargetAttr::CreateImplicit(Context));
  316. return true;
  317. }
  318. }
  319. }
  320. if (InferredTarget.hasValue()) {
  321. if (InferredTarget.getValue() == CFT_Device) {
  322. MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
  323. } else if (InferredTarget.getValue() == CFT_Host) {
  324. MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
  325. } else {
  326. MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
  327. MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
  328. }
  329. } else {
  330. // If no target was inferred, mark this member as __host__ __device__;
  331. // it's the least restrictive option that can be invoked from any target.
  332. MemberDecl->addAttr(CUDADeviceAttr::CreateImplicit(Context));
  333. MemberDecl->addAttr(CUDAHostAttr::CreateImplicit(Context));
  334. }
  335. return false;
  336. }
  337. bool Sema::isEmptyCudaConstructor(SourceLocation Loc, CXXConstructorDecl *CD) {
  338. if (!CD->isDefined() && CD->isTemplateInstantiation())
  339. InstantiateFunctionDefinition(Loc, CD->getFirstDecl());
  340. // (E.2.3.1, CUDA 7.5) A constructor for a class type is considered
  341. // empty at a point in the translation unit, if it is either a
  342. // trivial constructor
  343. if (CD->isTrivial())
  344. return true;
  345. // ... or it satisfies all of the following conditions:
  346. // The constructor function has been defined.
  347. // The constructor function has no parameters,
  348. // and the function body is an empty compound statement.
  349. if (!(CD->hasTrivialBody() && CD->getNumParams() == 0))
  350. return false;
  351. // Its class has no virtual functions and no virtual base classes.
  352. if (CD->getParent()->isDynamicClass())
  353. return false;
  354. // The only form of initializer allowed is an empty constructor.
  355. // This will recursively check all base classes and member initializers
  356. if (!llvm::all_of(CD->inits(), [&](const CXXCtorInitializer *CI) {
  357. if (const CXXConstructExpr *CE =
  358. dyn_cast<CXXConstructExpr>(CI->getInit()))
  359. return isEmptyCudaConstructor(Loc, CE->getConstructor());
  360. return false;
  361. }))
  362. return false;
  363. return true;
  364. }
  365. bool Sema::isEmptyCudaDestructor(SourceLocation Loc, CXXDestructorDecl *DD) {
  366. // No destructor -> no problem.
  367. if (!DD)
  368. return true;
  369. if (!DD->isDefined() && DD->isTemplateInstantiation())
  370. InstantiateFunctionDefinition(Loc, DD->getFirstDecl());
  371. // (E.2.3.1, CUDA 7.5) A destructor for a class type is considered
  372. // empty at a point in the translation unit, if it is either a
  373. // trivial constructor
  374. if (DD->isTrivial())
  375. return true;
  376. // ... or it satisfies all of the following conditions:
  377. // The destructor function has been defined.
  378. // and the function body is an empty compound statement.
  379. if (!DD->hasTrivialBody())
  380. return false;
  381. const CXXRecordDecl *ClassDecl = DD->getParent();
  382. // Its class has no virtual functions and no virtual base classes.
  383. if (ClassDecl->isDynamicClass())
  384. return false;
  385. // Only empty destructors are allowed. This will recursively check
  386. // destructors for all base classes...
  387. if (!llvm::all_of(ClassDecl->bases(), [&](const CXXBaseSpecifier &BS) {
  388. if (CXXRecordDecl *RD = BS.getType()->getAsCXXRecordDecl())
  389. return isEmptyCudaDestructor(Loc, RD->getDestructor());
  390. return true;
  391. }))
  392. return false;
  393. // ... and member fields.
  394. if (!llvm::all_of(ClassDecl->fields(), [&](const FieldDecl *Field) {
  395. if (CXXRecordDecl *RD = Field->getType()
  396. ->getBaseElementTypeUnsafe()
  397. ->getAsCXXRecordDecl())
  398. return isEmptyCudaDestructor(Loc, RD->getDestructor());
  399. return true;
  400. }))
  401. return false;
  402. return true;
  403. }
  404. // With -fcuda-host-device-constexpr, an unattributed constexpr function is
  405. // treated as implicitly __host__ __device__, unless:
  406. // * it is a variadic function (device-side variadic functions are not
  407. // allowed), or
  408. // * a __device__ function with this signature was already declared, in which
  409. // case in which case we output an error, unless the __device__ decl is in a
  410. // system header, in which case we leave the constexpr function unattributed.
  411. //
  412. // In addition, all function decls are treated as __host__ __device__ when
  413. // ForceCUDAHostDeviceDepth > 0 (corresponding to code within a
  414. // #pragma clang force_cuda_host_device_begin/end
  415. // pair).
  416. void Sema::maybeAddCUDAHostDeviceAttrs(FunctionDecl *NewD,
  417. const LookupResult &Previous) {
  418. assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
  419. if (ForceCUDAHostDeviceDepth > 0) {
  420. if (!NewD->hasAttr<CUDAHostAttr>())
  421. NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
  422. if (!NewD->hasAttr<CUDADeviceAttr>())
  423. NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
  424. return;
  425. }
  426. if (!getLangOpts().CUDAHostDeviceConstexpr || !NewD->isConstexpr() ||
  427. NewD->isVariadic() || NewD->hasAttr<CUDAHostAttr>() ||
  428. NewD->hasAttr<CUDADeviceAttr>() || NewD->hasAttr<CUDAGlobalAttr>())
  429. return;
  430. // Is D a __device__ function with the same signature as NewD, ignoring CUDA
  431. // attributes?
  432. auto IsMatchingDeviceFn = [&](NamedDecl *D) {
  433. if (UsingShadowDecl *Using = dyn_cast<UsingShadowDecl>(D))
  434. D = Using->getTargetDecl();
  435. FunctionDecl *OldD = D->getAsFunction();
  436. return OldD && OldD->hasAttr<CUDADeviceAttr>() &&
  437. !OldD->hasAttr<CUDAHostAttr>() &&
  438. !IsOverload(NewD, OldD, /* UseMemberUsingDeclRules = */ false,
  439. /* ConsiderCudaAttrs = */ false);
  440. };
  441. auto It = llvm::find_if(Previous, IsMatchingDeviceFn);
  442. if (It != Previous.end()) {
  443. // We found a __device__ function with the same name and signature as NewD
  444. // (ignoring CUDA attrs). This is an error unless that function is defined
  445. // in a system header, in which case we simply return without making NewD
  446. // host+device.
  447. NamedDecl *Match = *It;
  448. if (!getSourceManager().isInSystemHeader(Match->getLocation())) {
  449. Diag(NewD->getLocation(),
  450. diag::err_cuda_unattributed_constexpr_cannot_overload_device)
  451. << NewD->getName();
  452. Diag(Match->getLocation(),
  453. diag::note_cuda_conflicting_device_function_declared_here);
  454. }
  455. return;
  456. }
  457. NewD->addAttr(CUDAHostAttr::CreateImplicit(Context));
  458. NewD->addAttr(CUDADeviceAttr::CreateImplicit(Context));
  459. }
  460. // In CUDA, there are some constructs which may appear in semantically-valid
  461. // code, but trigger errors if we ever generate code for the function in which
  462. // they appear. Essentially every construct you're not allowed to use on the
  463. // device falls into this category, because you are allowed to use these
  464. // constructs in a __host__ __device__ function, but only if that function is
  465. // never codegen'ed on the device.
  466. //
  467. // To handle semantic checking for these constructs, we keep track of the set of
  468. // functions we know will be emitted, either because we could tell a priori that
  469. // they would be emitted, or because they were transitively called by a
  470. // known-emitted function.
  471. //
  472. // We also keep a partial call graph of which not-known-emitted functions call
  473. // which other not-known-emitted functions.
  474. //
  475. // When we see something which is illegal if the current function is emitted
  476. // (usually by way of CUDADiagIfDeviceCode, CUDADiagIfHostCode, or
  477. // CheckCUDACall), we first check if the current function is known-emitted. If
  478. // so, we immediately output the diagnostic.
  479. //
  480. // Otherwise, we "defer" the diagnostic. It sits in Sema::CUDADeferredDiags
  481. // until we discover that the function is known-emitted, at which point we take
  482. // it out of this map and emit the diagnostic.
  483. Sema::CUDADiagBuilder::CUDADiagBuilder(Kind K, SourceLocation Loc,
  484. unsigned DiagID, FunctionDecl *Fn,
  485. Sema &S)
  486. : S(S), Loc(Loc), DiagID(DiagID), Fn(Fn),
  487. ShowCallStack(K == K_ImmediateWithCallStack || K == K_Deferred) {
  488. switch (K) {
  489. case K_Nop:
  490. break;
  491. case K_Immediate:
  492. case K_ImmediateWithCallStack:
  493. ImmediateDiag.emplace(S.Diag(Loc, DiagID));
  494. break;
  495. case K_Deferred:
  496. assert(Fn && "Must have a function to attach the deferred diag to.");
  497. PartialDiag.emplace(S.PDiag(DiagID));
  498. break;
  499. }
  500. }
  501. // Print notes showing how we can reach FD starting from an a priori
  502. // known-callable function.
  503. static void EmitCallStackNotes(Sema &S, FunctionDecl *FD) {
  504. auto FnIt = S.CUDAKnownEmittedFns.find(FD);
  505. while (FnIt != S.CUDAKnownEmittedFns.end()) {
  506. DiagnosticBuilder Builder(
  507. S.Diags.Report(FnIt->second.Loc, diag::note_called_by));
  508. Builder << FnIt->second.FD;
  509. Builder.setForceEmit();
  510. FnIt = S.CUDAKnownEmittedFns.find(FnIt->second.FD);
  511. }
  512. }
  513. Sema::CUDADiagBuilder::~CUDADiagBuilder() {
  514. if (ImmediateDiag) {
  515. // Emit our diagnostic and, if it was a warning or error, output a callstack
  516. // if Fn isn't a priori known-emitted.
  517. bool IsWarningOrError = S.getDiagnostics().getDiagnosticLevel(
  518. DiagID, Loc) >= DiagnosticsEngine::Warning;
  519. ImmediateDiag.reset(); // Emit the immediate diag.
  520. if (IsWarningOrError && ShowCallStack)
  521. EmitCallStackNotes(S, Fn);
  522. } else if (PartialDiag) {
  523. assert(ShowCallStack && "Must always show call stack for deferred diags.");
  524. S.CUDADeferredDiags[Fn].push_back({Loc, std::move(*PartialDiag)});
  525. }
  526. }
  527. // Do we know that we will eventually codegen the given function?
  528. static bool IsKnownEmitted(Sema &S, FunctionDecl *FD) {
  529. // Templates are emitted when they're instantiated.
  530. if (FD->isDependentContext())
  531. return false;
  532. // When compiling for device, host functions are never emitted. Similarly,
  533. // when compiling for host, device and global functions are never emitted.
  534. // (Technically, we do emit a host-side stub for global functions, but this
  535. // doesn't count for our purposes here.)
  536. Sema::CUDAFunctionTarget T = S.IdentifyCUDATarget(FD);
  537. if (S.getLangOpts().CUDAIsDevice && T == Sema::CFT_Host)
  538. return false;
  539. if (!S.getLangOpts().CUDAIsDevice &&
  540. (T == Sema::CFT_Device || T == Sema::CFT_Global))
  541. return false;
  542. // Check whether this function is externally visible -- if so, it's
  543. // known-emitted.
  544. //
  545. // We have to check the GVA linkage of the function's *definition* -- if we
  546. // only have a declaration, we don't know whether or not the function will be
  547. // emitted, because (say) the definition could include "inline".
  548. FunctionDecl *Def = FD->getDefinition();
  549. // We may currently be parsing the body of FD, in which case
  550. // FD->getDefinition() will be null, but we still want to treat FD as though
  551. // it's a definition.
  552. if (!Def && FD->willHaveBody())
  553. Def = FD;
  554. if (Def &&
  555. !isDiscardableGVALinkage(S.getASTContext().GetGVALinkageForFunction(Def)))
  556. return true;
  557. // Otherwise, the function is known-emitted if it's in our set of
  558. // known-emitted functions.
  559. return S.CUDAKnownEmittedFns.count(FD) > 0;
  560. }
  561. Sema::CUDADiagBuilder Sema::CUDADiagIfDeviceCode(SourceLocation Loc,
  562. unsigned DiagID) {
  563. assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
  564. CUDADiagBuilder::Kind DiagKind = [&] {
  565. switch (CurrentCUDATarget()) {
  566. case CFT_Global:
  567. case CFT_Device:
  568. return CUDADiagBuilder::K_Immediate;
  569. case CFT_HostDevice:
  570. // An HD function counts as host code if we're compiling for host, and
  571. // device code if we're compiling for device. Defer any errors in device
  572. // mode until the function is known-emitted.
  573. if (getLangOpts().CUDAIsDevice) {
  574. return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
  575. ? CUDADiagBuilder::K_ImmediateWithCallStack
  576. : CUDADiagBuilder::K_Deferred;
  577. }
  578. return CUDADiagBuilder::K_Nop;
  579. default:
  580. return CUDADiagBuilder::K_Nop;
  581. }
  582. }();
  583. return CUDADiagBuilder(DiagKind, Loc, DiagID,
  584. dyn_cast<FunctionDecl>(CurContext), *this);
  585. }
  586. Sema::CUDADiagBuilder Sema::CUDADiagIfHostCode(SourceLocation Loc,
  587. unsigned DiagID) {
  588. assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
  589. CUDADiagBuilder::Kind DiagKind = [&] {
  590. switch (CurrentCUDATarget()) {
  591. case CFT_Host:
  592. return CUDADiagBuilder::K_Immediate;
  593. case CFT_HostDevice:
  594. // An HD function counts as host code if we're compiling for host, and
  595. // device code if we're compiling for device. Defer any errors in device
  596. // mode until the function is known-emitted.
  597. if (getLangOpts().CUDAIsDevice)
  598. return CUDADiagBuilder::K_Nop;
  599. return IsKnownEmitted(*this, dyn_cast<FunctionDecl>(CurContext))
  600. ? CUDADiagBuilder::K_ImmediateWithCallStack
  601. : CUDADiagBuilder::K_Deferred;
  602. default:
  603. return CUDADiagBuilder::K_Nop;
  604. }
  605. }();
  606. return CUDADiagBuilder(DiagKind, Loc, DiagID,
  607. dyn_cast<FunctionDecl>(CurContext), *this);
  608. }
  609. // Emit any deferred diagnostics for FD and erase them from the map in which
  610. // they're stored.
  611. static void EmitDeferredDiags(Sema &S, FunctionDecl *FD) {
  612. auto It = S.CUDADeferredDiags.find(FD);
  613. if (It == S.CUDADeferredDiags.end())
  614. return;
  615. bool HasWarningOrError = false;
  616. for (PartialDiagnosticAt &PDAt : It->second) {
  617. const SourceLocation &Loc = PDAt.first;
  618. const PartialDiagnostic &PD = PDAt.second;
  619. HasWarningOrError |= S.getDiagnostics().getDiagnosticLevel(
  620. PD.getDiagID(), Loc) >= DiagnosticsEngine::Warning;
  621. DiagnosticBuilder Builder(S.Diags.Report(Loc, PD.getDiagID()));
  622. Builder.setForceEmit();
  623. PD.Emit(Builder);
  624. }
  625. S.CUDADeferredDiags.erase(It);
  626. // FIXME: Should this be called after every warning/error emitted in the loop
  627. // above, instead of just once per function? That would be consistent with
  628. // how we handle immediate errors, but it also seems like a bit much.
  629. if (HasWarningOrError)
  630. EmitCallStackNotes(S, FD);
  631. }
  632. // Indicate that this function (and thus everything it transtively calls) will
  633. // be codegen'ed, and emit any deferred diagnostics on this function and its
  634. // (transitive) callees.
  635. static void MarkKnownEmitted(Sema &S, FunctionDecl *OrigCaller,
  636. FunctionDecl *OrigCallee, SourceLocation OrigLoc) {
  637. // Nothing to do if we already know that FD is emitted.
  638. if (IsKnownEmitted(S, OrigCallee)) {
  639. assert(!S.CUDACallGraph.count(OrigCallee));
  640. return;
  641. }
  642. // We've just discovered that OrigCallee is known-emitted. Walk our call
  643. // graph to see what else we can now discover also must be emitted.
  644. struct CallInfo {
  645. FunctionDecl *Caller;
  646. FunctionDecl *Callee;
  647. SourceLocation Loc;
  648. };
  649. llvm::SmallVector<CallInfo, 4> Worklist = {{OrigCaller, OrigCallee, OrigLoc}};
  650. llvm::SmallSet<CanonicalDeclPtr<FunctionDecl>, 4> Seen;
  651. Seen.insert(OrigCallee);
  652. while (!Worklist.empty()) {
  653. CallInfo C = Worklist.pop_back_val();
  654. assert(!IsKnownEmitted(S, C.Callee) &&
  655. "Worklist should not contain known-emitted functions.");
  656. S.CUDAKnownEmittedFns[C.Callee] = {C.Caller, C.Loc};
  657. EmitDeferredDiags(S, C.Callee);
  658. // If this is a template instantiation, explore its callgraph as well:
  659. // Non-dependent calls are part of the template's callgraph, while dependent
  660. // calls are part of to the instantiation's call graph.
  661. if (auto *Templ = C.Callee->getPrimaryTemplate()) {
  662. FunctionDecl *TemplFD = Templ->getAsFunction();
  663. if (!Seen.count(TemplFD) && !S.CUDAKnownEmittedFns.count(TemplFD)) {
  664. Seen.insert(TemplFD);
  665. Worklist.push_back(
  666. {/* Caller = */ C.Caller, /* Callee = */ TemplFD, C.Loc});
  667. }
  668. }
  669. // Add all functions called by Callee to our worklist.
  670. auto CGIt = S.CUDACallGraph.find(C.Callee);
  671. if (CGIt == S.CUDACallGraph.end())
  672. continue;
  673. for (std::pair<CanonicalDeclPtr<FunctionDecl>, SourceLocation> FDLoc :
  674. CGIt->second) {
  675. FunctionDecl *NewCallee = FDLoc.first;
  676. SourceLocation CallLoc = FDLoc.second;
  677. if (Seen.count(NewCallee) || IsKnownEmitted(S, NewCallee))
  678. continue;
  679. Seen.insert(NewCallee);
  680. Worklist.push_back(
  681. {/* Caller = */ C.Callee, /* Callee = */ NewCallee, CallLoc});
  682. }
  683. // C.Callee is now known-emitted, so we no longer need to maintain its list
  684. // of callees in CUDACallGraph.
  685. S.CUDACallGraph.erase(CGIt);
  686. }
  687. }
  688. bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
  689. assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
  690. assert(Callee && "Callee may not be null.");
  691. // FIXME: Is bailing out early correct here? Should we instead assume that
  692. // the caller is a global initializer?
  693. FunctionDecl *Caller = dyn_cast<FunctionDecl>(CurContext);
  694. if (!Caller)
  695. return true;
  696. // If the caller is known-emitted, mark the callee as known-emitted.
  697. // Otherwise, mark the call in our call graph so we can traverse it later.
  698. bool CallerKnownEmitted = IsKnownEmitted(*this, Caller);
  699. if (CallerKnownEmitted)
  700. MarkKnownEmitted(*this, Caller, Callee, Loc);
  701. else {
  702. // If we have
  703. // host fn calls kernel fn calls host+device,
  704. // the HD function does not get instantiated on the host. We model this by
  705. // omitting at the call to the kernel from the callgraph. This ensures
  706. // that, when compiling for host, only HD functions actually called from the
  707. // host get marked as known-emitted.
  708. if (getLangOpts().CUDAIsDevice || IdentifyCUDATarget(Callee) != CFT_Global)
  709. CUDACallGraph[Caller].insert({Callee, Loc});
  710. }
  711. CUDADiagBuilder::Kind DiagKind = [&] {
  712. switch (IdentifyCUDAPreference(Caller, Callee)) {
  713. case CFP_Never:
  714. return CUDADiagBuilder::K_Immediate;
  715. case CFP_WrongSide:
  716. assert(Caller && "WrongSide calls require a non-null caller");
  717. // If we know the caller will be emitted, we know this wrong-side call
  718. // will be emitted, so it's an immediate error. Otherwise, defer the
  719. // error until we know the caller is emitted.
  720. return CallerKnownEmitted ? CUDADiagBuilder::K_ImmediateWithCallStack
  721. : CUDADiagBuilder::K_Deferred;
  722. default:
  723. return CUDADiagBuilder::K_Nop;
  724. }
  725. }();
  726. if (DiagKind == CUDADiagBuilder::K_Nop)
  727. return true;
  728. // Avoid emitting this error twice for the same location. Using a hashtable
  729. // like this is unfortunate, but because we must continue parsing as normal
  730. // after encountering a deferred error, it's otherwise very tricky for us to
  731. // ensure that we only emit this deferred error once.
  732. if (!LocsWithCUDACallDiags.insert({Caller, Loc}).second)
  733. return true;
  734. CUDADiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this)
  735. << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller);
  736. CUDADiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl,
  737. Caller, *this)
  738. << Callee;
  739. return DiagKind != CUDADiagBuilder::K_Immediate &&
  740. DiagKind != CUDADiagBuilder::K_ImmediateWithCallStack;
  741. }
  742. void Sema::CUDASetLambdaAttrs(CXXMethodDecl *Method) {
  743. assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
  744. if (Method->hasAttr<CUDAHostAttr>() || Method->hasAttr<CUDADeviceAttr>())
  745. return;
  746. FunctionDecl *CurFn = dyn_cast<FunctionDecl>(CurContext);
  747. if (!CurFn)
  748. return;
  749. CUDAFunctionTarget Target = IdentifyCUDATarget(CurFn);
  750. if (Target == CFT_Global || Target == CFT_Device) {
  751. Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
  752. } else if (Target == CFT_HostDevice) {
  753. Method->addAttr(CUDADeviceAttr::CreateImplicit(Context));
  754. Method->addAttr(CUDAHostAttr::CreateImplicit(Context));
  755. }
  756. }
  757. void Sema::checkCUDATargetOverload(FunctionDecl *NewFD,
  758. LookupResult &Previous) {
  759. assert(getLangOpts().CUDA && "Should only be called during CUDA compilation");
  760. CUDAFunctionTarget NewTarget = IdentifyCUDATarget(NewFD);
  761. for (NamedDecl *OldND : Previous) {
  762. FunctionDecl *OldFD = OldND->getAsFunction();
  763. if (!OldFD)
  764. continue;
  765. CUDAFunctionTarget OldTarget = IdentifyCUDATarget(OldFD);
  766. // Don't allow HD and global functions to overload other functions with the
  767. // same signature. We allow overloading based on CUDA attributes so that
  768. // functions can have different implementations on the host and device, but
  769. // HD/global functions "exist" in some sense on both the host and device, so
  770. // should have the same implementation on both sides.
  771. if (NewTarget != OldTarget &&
  772. ((NewTarget == CFT_HostDevice) || (OldTarget == CFT_HostDevice) ||
  773. (NewTarget == CFT_Global) || (OldTarget == CFT_Global)) &&
  774. !IsOverload(NewFD, OldFD, /* UseMemberUsingDeclRules = */ false,
  775. /* ConsiderCudaAttrs = */ false)) {
  776. Diag(NewFD->getLocation(), diag::err_cuda_ovl_target)
  777. << NewTarget << NewFD->getDeclName() << OldTarget << OldFD;
  778. Diag(OldFD->getLocation(), diag::note_previous_declaration);
  779. NewFD->setInvalidDecl();
  780. break;
  781. }
  782. }
  783. }