AMDGPU.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350
  1. //===--- AMDGPU.cpp - Implement AMDGPU target feature support -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements AMDGPU TargetInfo objects.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "AMDGPU.h"
  13. #include "clang/Basic/Builtins.h"
  14. #include "clang/Basic/CodeGenOptions.h"
  15. #include "clang/Basic/LangOptions.h"
  16. #include "clang/Basic/MacroBuilder.h"
  17. #include "clang/Basic/TargetBuiltins.h"
  18. #include "llvm/ADT/StringSwitch.h"
  19. #include "llvm/IR/DataLayout.h"
  20. using namespace clang;
  21. using namespace clang::targets;
  22. namespace clang {
  23. namespace targets {
  24. // If you edit the description strings, make sure you update
  25. // getPointerWidthV().
  26. static const char *const DataLayoutStringR600 =
  27. "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
  28. "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
  29. static const char *const DataLayoutStringAMDGCN =
  30. "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
  31. "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
  32. "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5"
  33. "-ni:7";
  34. const LangASMap AMDGPUTargetInfo::AMDGPUDefIsGenMap = {
  35. Generic, // Default
  36. Global, // opencl_global
  37. Local, // opencl_local
  38. Constant, // opencl_constant
  39. Private, // opencl_private
  40. Generic, // opencl_generic
  41. Global, // cuda_device
  42. Constant, // cuda_constant
  43. Local // cuda_shared
  44. };
  45. const LangASMap AMDGPUTargetInfo::AMDGPUDefIsPrivMap = {
  46. Private, // Default
  47. Global, // opencl_global
  48. Local, // opencl_local
  49. Constant, // opencl_constant
  50. Private, // opencl_private
  51. Generic, // opencl_generic
  52. Global, // cuda_device
  53. Constant, // cuda_constant
  54. Local // cuda_shared
  55. };
  56. } // namespace targets
  57. } // namespace clang
  58. const Builtin::Info AMDGPUTargetInfo::BuiltinInfo[] = {
  59. #define BUILTIN(ID, TYPE, ATTRS) \
  60. {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, nullptr},
  61. #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) \
  62. {#ID, TYPE, ATTRS, nullptr, ALL_LANGUAGES, FEATURE},
  63. #include "clang/Basic/BuiltinsAMDGPU.def"
  64. };
  65. const char *const AMDGPUTargetInfo::GCCRegNames[] = {
  66. "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v8",
  67. "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17",
  68. "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26",
  69. "v27", "v28", "v29", "v30", "v31", "v32", "v33", "v34", "v35",
  70. "v36", "v37", "v38", "v39", "v40", "v41", "v42", "v43", "v44",
  71. "v45", "v46", "v47", "v48", "v49", "v50", "v51", "v52", "v53",
  72. "v54", "v55", "v56", "v57", "v58", "v59", "v60", "v61", "v62",
  73. "v63", "v64", "v65", "v66", "v67", "v68", "v69", "v70", "v71",
  74. "v72", "v73", "v74", "v75", "v76", "v77", "v78", "v79", "v80",
  75. "v81", "v82", "v83", "v84", "v85", "v86", "v87", "v88", "v89",
  76. "v90", "v91", "v92", "v93", "v94", "v95", "v96", "v97", "v98",
  77. "v99", "v100", "v101", "v102", "v103", "v104", "v105", "v106", "v107",
  78. "v108", "v109", "v110", "v111", "v112", "v113", "v114", "v115", "v116",
  79. "v117", "v118", "v119", "v120", "v121", "v122", "v123", "v124", "v125",
  80. "v126", "v127", "v128", "v129", "v130", "v131", "v132", "v133", "v134",
  81. "v135", "v136", "v137", "v138", "v139", "v140", "v141", "v142", "v143",
  82. "v144", "v145", "v146", "v147", "v148", "v149", "v150", "v151", "v152",
  83. "v153", "v154", "v155", "v156", "v157", "v158", "v159", "v160", "v161",
  84. "v162", "v163", "v164", "v165", "v166", "v167", "v168", "v169", "v170",
  85. "v171", "v172", "v173", "v174", "v175", "v176", "v177", "v178", "v179",
  86. "v180", "v181", "v182", "v183", "v184", "v185", "v186", "v187", "v188",
  87. "v189", "v190", "v191", "v192", "v193", "v194", "v195", "v196", "v197",
  88. "v198", "v199", "v200", "v201", "v202", "v203", "v204", "v205", "v206",
  89. "v207", "v208", "v209", "v210", "v211", "v212", "v213", "v214", "v215",
  90. "v216", "v217", "v218", "v219", "v220", "v221", "v222", "v223", "v224",
  91. "v225", "v226", "v227", "v228", "v229", "v230", "v231", "v232", "v233",
  92. "v234", "v235", "v236", "v237", "v238", "v239", "v240", "v241", "v242",
  93. "v243", "v244", "v245", "v246", "v247", "v248", "v249", "v250", "v251",
  94. "v252", "v253", "v254", "v255", "s0", "s1", "s2", "s3", "s4",
  95. "s5", "s6", "s7", "s8", "s9", "s10", "s11", "s12", "s13",
  96. "s14", "s15", "s16", "s17", "s18", "s19", "s20", "s21", "s22",
  97. "s23", "s24", "s25", "s26", "s27", "s28", "s29", "s30", "s31",
  98. "s32", "s33", "s34", "s35", "s36", "s37", "s38", "s39", "s40",
  99. "s41", "s42", "s43", "s44", "s45", "s46", "s47", "s48", "s49",
  100. "s50", "s51", "s52", "s53", "s54", "s55", "s56", "s57", "s58",
  101. "s59", "s60", "s61", "s62", "s63", "s64", "s65", "s66", "s67",
  102. "s68", "s69", "s70", "s71", "s72", "s73", "s74", "s75", "s76",
  103. "s77", "s78", "s79", "s80", "s81", "s82", "s83", "s84", "s85",
  104. "s86", "s87", "s88", "s89", "s90", "s91", "s92", "s93", "s94",
  105. "s95", "s96", "s97", "s98", "s99", "s100", "s101", "s102", "s103",
  106. "s104", "s105", "s106", "s107", "s108", "s109", "s110", "s111", "s112",
  107. "s113", "s114", "s115", "s116", "s117", "s118", "s119", "s120", "s121",
  108. "s122", "s123", "s124", "s125", "s126", "s127", "exec", "vcc", "scc",
  109. "m0", "flat_scratch", "exec_lo", "exec_hi", "vcc_lo", "vcc_hi",
  110. "flat_scratch_lo", "flat_scratch_hi"
  111. };
  112. ArrayRef<const char *> AMDGPUTargetInfo::getGCCRegNames() const {
  113. return llvm::makeArrayRef(GCCRegNames);
  114. }
  115. bool AMDGPUTargetInfo::initFeatureMap(
  116. llvm::StringMap<bool> &Features, DiagnosticsEngine &Diags, StringRef CPU,
  117. const std::vector<std::string> &FeatureVec) const {
  118. using namespace llvm::AMDGPU;
  119. // XXX - What does the member GPU mean if device name string passed here?
  120. if (isAMDGCN(getTriple())) {
  121. switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) {
  122. case GK_GFX1012:
  123. case GK_GFX1011:
  124. Features["dot1-insts"] = true;
  125. Features["dot2-insts"] = true;
  126. Features["dot5-insts"] = true;
  127. Features["dot6-insts"] = true;
  128. LLVM_FALLTHROUGH;
  129. case GK_GFX1010:
  130. Features["dl-insts"] = true;
  131. Features["ci-insts"] = true;
  132. Features["flat-address-space"] = true;
  133. Features["16-bit-insts"] = true;
  134. Features["dpp"] = true;
  135. Features["gfx8-insts"] = true;
  136. Features["gfx9-insts"] = true;
  137. Features["gfx10-insts"] = true;
  138. Features["s-memrealtime"] = true;
  139. break;
  140. case GK_GFX908:
  141. Features["dot3-insts"] = true;
  142. Features["dot4-insts"] = true;
  143. Features["dot5-insts"] = true;
  144. Features["dot6-insts"] = true;
  145. LLVM_FALLTHROUGH;
  146. case GK_GFX906:
  147. Features["dl-insts"] = true;
  148. Features["dot1-insts"] = true;
  149. Features["dot2-insts"] = true;
  150. LLVM_FALLTHROUGH;
  151. case GK_GFX909:
  152. case GK_GFX904:
  153. case GK_GFX902:
  154. case GK_GFX900:
  155. Features["gfx9-insts"] = true;
  156. LLVM_FALLTHROUGH;
  157. case GK_GFX810:
  158. case GK_GFX803:
  159. case GK_GFX802:
  160. case GK_GFX801:
  161. Features["gfx8-insts"] = true;
  162. Features["16-bit-insts"] = true;
  163. Features["dpp"] = true;
  164. Features["s-memrealtime"] = true;
  165. LLVM_FALLTHROUGH;
  166. case GK_GFX704:
  167. case GK_GFX703:
  168. case GK_GFX702:
  169. case GK_GFX701:
  170. case GK_GFX700:
  171. Features["ci-insts"] = true;
  172. Features["flat-address-space"] = true;
  173. LLVM_FALLTHROUGH;
  174. case GK_GFX601:
  175. case GK_GFX600:
  176. break;
  177. case GK_NONE:
  178. break;
  179. default:
  180. llvm_unreachable("Unhandled GPU!");
  181. }
  182. } else {
  183. if (CPU.empty())
  184. CPU = "r600";
  185. switch (llvm::AMDGPU::parseArchR600(CPU)) {
  186. case GK_CAYMAN:
  187. case GK_CYPRESS:
  188. case GK_RV770:
  189. case GK_RV670:
  190. // TODO: Add fp64 when implemented.
  191. break;
  192. case GK_TURKS:
  193. case GK_CAICOS:
  194. case GK_BARTS:
  195. case GK_SUMO:
  196. case GK_REDWOOD:
  197. case GK_JUNIPER:
  198. case GK_CEDAR:
  199. case GK_RV730:
  200. case GK_RV710:
  201. case GK_RS880:
  202. case GK_R630:
  203. case GK_R600:
  204. break;
  205. default:
  206. llvm_unreachable("Unhandled GPU!");
  207. }
  208. }
  209. return TargetInfo::initFeatureMap(Features, Diags, CPU, FeatureVec);
  210. }
  211. void AMDGPUTargetInfo::adjustTargetOptions(const CodeGenOptions &CGOpts,
  212. TargetOptions &TargetOpts) const {
  213. bool hasFP32Denormals = false;
  214. bool hasFP64Denormals = false;
  215. for (auto &I : TargetOpts.FeaturesAsWritten) {
  216. if (I == "+fp32-denormals" || I == "-fp32-denormals")
  217. hasFP32Denormals = true;
  218. if (I == "+fp64-fp16-denormals" || I == "-fp64-fp16-denormals")
  219. hasFP64Denormals = true;
  220. }
  221. if (!hasFP32Denormals)
  222. TargetOpts.Features.push_back(
  223. (Twine(hasFastFMAF() && hasFullRateDenormalsF32() && !CGOpts.FlushDenorm
  224. ? '+' : '-') + Twine("fp32-denormals"))
  225. .str());
  226. // Always do not flush fp64 or fp16 denorms.
  227. if (!hasFP64Denormals && hasFP64())
  228. TargetOpts.Features.push_back("+fp64-fp16-denormals");
  229. }
  230. void AMDGPUTargetInfo::fillValidCPUList(
  231. SmallVectorImpl<StringRef> &Values) const {
  232. if (isAMDGCN(getTriple()))
  233. llvm::AMDGPU::fillValidArchListAMDGCN(Values);
  234. else
  235. llvm::AMDGPU::fillValidArchListR600(Values);
  236. }
  237. void AMDGPUTargetInfo::setAddressSpaceMap(bool DefaultIsPrivate) {
  238. AddrSpaceMap = DefaultIsPrivate ? &AMDGPUDefIsPrivMap : &AMDGPUDefIsGenMap;
  239. }
  240. AMDGPUTargetInfo::AMDGPUTargetInfo(const llvm::Triple &Triple,
  241. const TargetOptions &Opts)
  242. : TargetInfo(Triple),
  243. GPUKind(isAMDGCN(Triple) ?
  244. llvm::AMDGPU::parseArchAMDGCN(Opts.CPU) :
  245. llvm::AMDGPU::parseArchR600(Opts.CPU)),
  246. GPUFeatures(isAMDGCN(Triple) ?
  247. llvm::AMDGPU::getArchAttrAMDGCN(GPUKind) :
  248. llvm::AMDGPU::getArchAttrR600(GPUKind)) {
  249. resetDataLayout(isAMDGCN(getTriple()) ? DataLayoutStringAMDGCN
  250. : DataLayoutStringR600);
  251. assert(DataLayout->getAllocaAddrSpace() == Private);
  252. setAddressSpaceMap(Triple.getOS() == llvm::Triple::Mesa3D ||
  253. !isAMDGCN(Triple));
  254. UseAddrSpaceMapMangling = true;
  255. HasLegalHalfType = true;
  256. HasFloat16 = true;
  257. // Set pointer width and alignment for target address space 0.
  258. PointerWidth = PointerAlign = DataLayout->getPointerSizeInBits();
  259. if (getMaxPointerWidth() == 64) {
  260. LongWidth = LongAlign = 64;
  261. SizeType = UnsignedLong;
  262. PtrDiffType = SignedLong;
  263. IntPtrType = SignedLong;
  264. }
  265. MaxAtomicPromoteWidth = MaxAtomicInlineWidth = 64;
  266. }
  267. void AMDGPUTargetInfo::adjust(LangOptions &Opts) {
  268. TargetInfo::adjust(Opts);
  269. // ToDo: There are still a few places using default address space as private
  270. // address space in OpenCL, which needs to be cleaned up, then Opts.OpenCL
  271. // can be removed from the following line.
  272. setAddressSpaceMap(/*DefaultIsPrivate=*/Opts.OpenCL ||
  273. !isAMDGCN(getTriple()));
  274. }
  275. ArrayRef<Builtin::Info> AMDGPUTargetInfo::getTargetBuiltins() const {
  276. return llvm::makeArrayRef(BuiltinInfo, clang::AMDGPU::LastTSBuiltin -
  277. Builtin::FirstTSBuiltin);
  278. }
  279. void AMDGPUTargetInfo::getTargetDefines(const LangOptions &Opts,
  280. MacroBuilder &Builder) const {
  281. Builder.defineMacro("__AMD__");
  282. Builder.defineMacro("__AMDGPU__");
  283. if (isAMDGCN(getTriple()))
  284. Builder.defineMacro("__AMDGCN__");
  285. else
  286. Builder.defineMacro("__R600__");
  287. if (GPUKind != llvm::AMDGPU::GK_NONE) {
  288. StringRef CanonName = isAMDGCN(getTriple()) ?
  289. getArchNameAMDGCN(GPUKind) : getArchNameR600(GPUKind);
  290. Builder.defineMacro(Twine("__") + Twine(CanonName) + Twine("__"));
  291. }
  292. // TODO: __HAS_FMAF__, __HAS_LDEXPF__, __HAS_FP64__ are deprecated and will be
  293. // removed in the near future.
  294. if (hasFMAF())
  295. Builder.defineMacro("__HAS_FMAF__");
  296. if (hasFastFMAF())
  297. Builder.defineMacro("FP_FAST_FMAF");
  298. if (hasLDEXPF())
  299. Builder.defineMacro("__HAS_LDEXPF__");
  300. if (hasFP64())
  301. Builder.defineMacro("__HAS_FP64__");
  302. if (hasFastFMA())
  303. Builder.defineMacro("FP_FAST_FMA");
  304. }
  305. void AMDGPUTargetInfo::setAuxTarget(const TargetInfo *Aux) {
  306. assert(HalfFormat == Aux->HalfFormat);
  307. assert(FloatFormat == Aux->FloatFormat);
  308. assert(DoubleFormat == Aux->DoubleFormat);
  309. // On x86_64 long double is 80-bit extended precision format, which is
  310. // not supported by AMDGPU. 128-bit floating point format is also not
  311. // supported by AMDGPU. Therefore keep its own format for these two types.
  312. auto SaveLongDoubleFormat = LongDoubleFormat;
  313. auto SaveFloat128Format = Float128Format;
  314. copyAuxTarget(Aux);
  315. LongDoubleFormat = SaveLongDoubleFormat;
  316. Float128Format = SaveFloat128Format;
  317. }