HIP.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449
  1. //===--- HIP.cpp - HIP Tool and ToolChain Implementations -------*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "HIP.h"
  9. #include "CommonArgs.h"
  10. #include "InputInfo.h"
  11. #include "clang/Basic/Cuda.h"
  12. #include "clang/Driver/Compilation.h"
  13. #include "clang/Driver/Driver.h"
  14. #include "clang/Driver/DriverDiagnostic.h"
  15. #include "clang/Driver/Options.h"
  16. #include "llvm/Support/FileSystem.h"
  17. #include "llvm/Support/Path.h"
  18. using namespace clang::driver;
  19. using namespace clang::driver::toolchains;
  20. using namespace clang::driver::tools;
  21. using namespace clang;
  22. using namespace llvm::opt;
  23. #if defined(_WIN32) || defined(_WIN64)
  24. #define NULL_FILE "nul"
  25. #else
  26. #define NULL_FILE "/dev/null"
  27. #endif
  28. namespace {
  29. static void addBCLib(const Driver &D, const ArgList &Args,
  30. ArgStringList &CmdArgs, ArgStringList LibraryPaths,
  31. StringRef BCName) {
  32. StringRef FullName;
  33. for (std::string LibraryPath : LibraryPaths) {
  34. SmallString<128> Path(LibraryPath);
  35. llvm::sys::path::append(Path, BCName);
  36. FullName = Path;
  37. if (llvm::sys::fs::exists(FullName)) {
  38. CmdArgs.push_back("-mlink-builtin-bitcode");
  39. CmdArgs.push_back(Args.MakeArgString(FullName));
  40. return;
  41. }
  42. }
  43. D.Diag(diag::err_drv_no_such_file) << BCName;
  44. }
  45. static const char *getOutputFileName(Compilation &C, StringRef Base,
  46. const char *Postfix,
  47. const char *Extension) {
  48. const char *OutputFileName;
  49. if (C.getDriver().isSaveTempsEnabled()) {
  50. OutputFileName =
  51. C.getArgs().MakeArgString(Base.str() + Postfix + "." + Extension);
  52. } else {
  53. std::string TmpName =
  54. C.getDriver().GetTemporaryPath(Base.str() + Postfix, Extension);
  55. OutputFileName = C.addTempFile(C.getArgs().MakeArgString(TmpName));
  56. }
  57. return OutputFileName;
  58. }
  59. } // namespace
  60. const char *AMDGCN::Linker::constructLLVMLinkCommand(
  61. Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
  62. const ArgList &Args, StringRef SubArchName,
  63. StringRef OutputFilePrefix) const {
  64. ArgStringList CmdArgs;
  65. // Add the input bc's created by compile step.
  66. for (const auto &II : Inputs)
  67. CmdArgs.push_back(II.getFilename());
  68. // Add an intermediate output file.
  69. CmdArgs.push_back("-o");
  70. auto OutputFileName = getOutputFileName(C, OutputFilePrefix, "-linked", "bc");
  71. CmdArgs.push_back(OutputFileName);
  72. SmallString<128> ExecPath(C.getDriver().Dir);
  73. llvm::sys::path::append(ExecPath, "llvm-link");
  74. const char *Exec = Args.MakeArgString(ExecPath);
  75. C.addCommand(std::make_unique<Command>(JA, *this, Exec, CmdArgs, Inputs));
  76. return OutputFileName;
  77. }
  78. const char *AMDGCN::Linker::constructOptCommand(
  79. Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
  80. const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
  81. llvm::StringRef OutputFilePrefix, const char *InputFileName) const {
  82. // Construct opt command.
  83. ArgStringList OptArgs;
  84. // The input to opt is the output from llvm-link.
  85. OptArgs.push_back(InputFileName);
  86. // Pass optimization arg to opt.
  87. if (Arg *A = Args.getLastArg(options::OPT_O_Group)) {
  88. StringRef OOpt = "3";
  89. if (A->getOption().matches(options::OPT_O4) ||
  90. A->getOption().matches(options::OPT_Ofast))
  91. OOpt = "3";
  92. else if (A->getOption().matches(options::OPT_O0))
  93. OOpt = "0";
  94. else if (A->getOption().matches(options::OPT_O)) {
  95. // -Os, -Oz, and -O(anything else) map to -O2
  96. OOpt = llvm::StringSwitch<const char *>(A->getValue())
  97. .Case("1", "1")
  98. .Case("2", "2")
  99. .Case("3", "3")
  100. .Case("s", "2")
  101. .Case("z", "2")
  102. .Default("2");
  103. }
  104. OptArgs.push_back(Args.MakeArgString("-O" + OOpt));
  105. }
  106. OptArgs.push_back("-mtriple=amdgcn-amd-amdhsa");
  107. OptArgs.push_back(Args.MakeArgString("-mcpu=" + SubArchName));
  108. for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
  109. OptArgs.push_back(A->getValue(0));
  110. }
  111. OptArgs.push_back("-o");
  112. auto OutputFileName =
  113. getOutputFileName(C, OutputFilePrefix, "-optimized", "bc");
  114. OptArgs.push_back(OutputFileName);
  115. SmallString<128> OptPath(C.getDriver().Dir);
  116. llvm::sys::path::append(OptPath, "opt");
  117. const char *OptExec = Args.MakeArgString(OptPath);
  118. C.addCommand(std::make_unique<Command>(JA, *this, OptExec, OptArgs, Inputs));
  119. return OutputFileName;
  120. }
  121. const char *AMDGCN::Linker::constructLlcCommand(
  122. Compilation &C, const JobAction &JA, const InputInfoList &Inputs,
  123. const llvm::opt::ArgList &Args, llvm::StringRef SubArchName,
  124. llvm::StringRef OutputFilePrefix, const char *InputFileName,
  125. bool OutputIsAsm) const {
  126. // Construct llc command.
  127. ArgStringList LlcArgs{
  128. InputFileName, "-mtriple=amdgcn-amd-amdhsa",
  129. Args.MakeArgString(Twine("-filetype=") + (OutputIsAsm ? "asm" : "obj")),
  130. Args.MakeArgString("-mcpu=" + SubArchName)};
  131. // Extract all the -m options
  132. std::vector<llvm::StringRef> Features;
  133. handleTargetFeaturesGroup(
  134. Args, Features, options::OPT_m_amdgpu_Features_Group);
  135. // Add features to mattr such as xnack
  136. std::string MAttrString = "-mattr=";
  137. for(auto OneFeature : Features) {
  138. MAttrString.append(Args.MakeArgString(OneFeature));
  139. if (OneFeature != Features.back())
  140. MAttrString.append(",");
  141. }
  142. if(!Features.empty())
  143. LlcArgs.push_back(Args.MakeArgString(MAttrString));
  144. for (const Arg *A : Args.filtered(options::OPT_mllvm)) {
  145. LlcArgs.push_back(A->getValue(0));
  146. }
  147. // Add output filename
  148. LlcArgs.push_back("-o");
  149. auto LlcOutputFile =
  150. getOutputFileName(C, OutputFilePrefix, "", OutputIsAsm ? "s" : "o");
  151. LlcArgs.push_back(LlcOutputFile);
  152. SmallString<128> LlcPath(C.getDriver().Dir);
  153. llvm::sys::path::append(LlcPath, "llc");
  154. const char *Llc = Args.MakeArgString(LlcPath);
  155. C.addCommand(std::make_unique<Command>(JA, *this, Llc, LlcArgs, Inputs));
  156. return LlcOutputFile;
  157. }
  158. void AMDGCN::Linker::constructLldCommand(Compilation &C, const JobAction &JA,
  159. const InputInfoList &Inputs,
  160. const InputInfo &Output,
  161. const llvm::opt::ArgList &Args,
  162. const char *InputFileName) const {
  163. // Construct lld command.
  164. // The output from ld.lld is an HSA code object file.
  165. ArgStringList LldArgs{
  166. "-flavor", "gnu", "-shared", "-o", Output.getFilename(), InputFileName};
  167. SmallString<128> LldPath(C.getDriver().Dir);
  168. llvm::sys::path::append(LldPath, "lld");
  169. const char *Lld = Args.MakeArgString(LldPath);
  170. C.addCommand(std::make_unique<Command>(JA, *this, Lld, LldArgs, Inputs));
  171. }
  172. // Construct a clang-offload-bundler command to bundle code objects for
  173. // different GPU's into a HIP fat binary.
  174. void AMDGCN::constructHIPFatbinCommand(Compilation &C, const JobAction &JA,
  175. StringRef OutputFileName, const InputInfoList &Inputs,
  176. const llvm::opt::ArgList &Args, const Tool& T) {
  177. // Construct clang-offload-bundler command to bundle object files for
  178. // for different GPU archs.
  179. ArgStringList BundlerArgs;
  180. BundlerArgs.push_back(Args.MakeArgString("-type=o"));
  181. // ToDo: Remove the dummy host binary entry which is required by
  182. // clang-offload-bundler.
  183. std::string BundlerTargetArg = "-targets=host-x86_64-unknown-linux";
  184. std::string BundlerInputArg = "-inputs=" NULL_FILE;
  185. for (const auto &II : Inputs) {
  186. const auto* A = II.getAction();
  187. BundlerTargetArg = BundlerTargetArg + ",hip-amdgcn-amd-amdhsa-" +
  188. StringRef(A->getOffloadingArch()).str();
  189. BundlerInputArg = BundlerInputArg + "," + II.getFilename();
  190. }
  191. BundlerArgs.push_back(Args.MakeArgString(BundlerTargetArg));
  192. BundlerArgs.push_back(Args.MakeArgString(BundlerInputArg));
  193. auto BundlerOutputArg =
  194. Args.MakeArgString(std::string("-outputs=").append(OutputFileName));
  195. BundlerArgs.push_back(BundlerOutputArg);
  196. SmallString<128> BundlerPath(C.getDriver().Dir);
  197. llvm::sys::path::append(BundlerPath, "clang-offload-bundler");
  198. const char *Bundler = Args.MakeArgString(BundlerPath);
  199. C.addCommand(std::make_unique<Command>(JA, T, Bundler, BundlerArgs, Inputs));
  200. }
  201. // For amdgcn the inputs of the linker job are device bitcode and output is
  202. // object file. It calls llvm-link, opt, llc, then lld steps.
  203. void AMDGCN::Linker::ConstructJob(Compilation &C, const JobAction &JA,
  204. const InputInfo &Output,
  205. const InputInfoList &Inputs,
  206. const ArgList &Args,
  207. const char *LinkingOutput) const {
  208. if (JA.getType() == types::TY_HIP_FATBIN)
  209. return constructHIPFatbinCommand(C, JA, Output.getFilename(), Inputs, Args, *this);
  210. assert(getToolChain().getTriple().getArch() == llvm::Triple::amdgcn &&
  211. "Unsupported target");
  212. std::string SubArchName = JA.getOffloadingArch();
  213. assert(StringRef(SubArchName).startswith("gfx") && "Unsupported sub arch");
  214. // Prefix for temporary file name.
  215. std::string Prefix = llvm::sys::path::stem(Inputs[0].getFilename()).str();
  216. if (!C.getDriver().isSaveTempsEnabled())
  217. Prefix += "-" + SubArchName;
  218. // Each command outputs different files.
  219. const char *LLVMLinkCommand =
  220. constructLLVMLinkCommand(C, JA, Inputs, Args, SubArchName, Prefix);
  221. const char *OptCommand = constructOptCommand(C, JA, Inputs, Args, SubArchName,
  222. Prefix, LLVMLinkCommand);
  223. if (C.getDriver().isSaveTempsEnabled())
  224. constructLlcCommand(C, JA, Inputs, Args, SubArchName, Prefix, OptCommand,
  225. /*OutputIsAsm=*/true);
  226. const char *LlcCommand =
  227. constructLlcCommand(C, JA, Inputs, Args, SubArchName, Prefix, OptCommand);
  228. constructLldCommand(C, JA, Inputs, Output, Args, LlcCommand);
  229. }
  230. HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple,
  231. const ToolChain &HostTC, const ArgList &Args)
  232. : ToolChain(D, Triple, Args), HostTC(HostTC) {
  233. // Lookup binaries into the driver directory, this is used to
  234. // discover the clang-offload-bundler executable.
  235. getProgramPaths().push_back(getDriver().Dir);
  236. }
  237. void HIPToolChain::addClangTargetOptions(
  238. const llvm::opt::ArgList &DriverArgs,
  239. llvm::opt::ArgStringList &CC1Args,
  240. Action::OffloadKind DeviceOffloadingKind) const {
  241. HostTC.addClangTargetOptions(DriverArgs, CC1Args, DeviceOffloadingKind);
  242. StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_march_EQ);
  243. assert(!GpuArch.empty() && "Must have an explicit GPU arch.");
  244. (void) GpuArch;
  245. assert(DeviceOffloadingKind == Action::OFK_HIP &&
  246. "Only HIP offloading kinds are supported for GPUs.");
  247. CC1Args.push_back("-target-cpu");
  248. CC1Args.push_back(DriverArgs.MakeArgStringRef(GpuArch));
  249. CC1Args.push_back("-fcuda-is-device");
  250. if (DriverArgs.hasFlag(options::OPT_fcuda_flush_denormals_to_zero,
  251. options::OPT_fno_cuda_flush_denormals_to_zero, false))
  252. CC1Args.push_back("-fcuda-flush-denormals-to-zero");
  253. if (DriverArgs.hasFlag(options::OPT_fcuda_approx_transcendentals,
  254. options::OPT_fno_cuda_approx_transcendentals, false))
  255. CC1Args.push_back("-fcuda-approx-transcendentals");
  256. if (DriverArgs.hasFlag(options::OPT_fgpu_rdc, options::OPT_fno_gpu_rdc,
  257. false))
  258. CC1Args.push_back("-fgpu-rdc");
  259. // Default to "hidden" visibility, as object level linking will not be
  260. // supported for the foreseeable future.
  261. if (!DriverArgs.hasArg(options::OPT_fvisibility_EQ,
  262. options::OPT_fvisibility_ms_compat)) {
  263. CC1Args.append({"-fvisibility", "hidden"});
  264. CC1Args.push_back("-fapply-global-visibility-to-externs");
  265. }
  266. if (DriverArgs.hasArg(options::OPT_nogpulib))
  267. return;
  268. ArgStringList LibraryPaths;
  269. // Find in --hip-device-lib-path and HIP_LIBRARY_PATH.
  270. for (auto Path :
  271. DriverArgs.getAllArgValues(options::OPT_hip_device_lib_path_EQ))
  272. LibraryPaths.push_back(DriverArgs.MakeArgString(Path));
  273. addDirectoryList(DriverArgs, LibraryPaths, "-L", "HIP_DEVICE_LIB_PATH");
  274. llvm::SmallVector<std::string, 10> BCLibs;
  275. // Add bitcode library in --hip-device-lib.
  276. for (auto Lib : DriverArgs.getAllArgValues(options::OPT_hip_device_lib_EQ)) {
  277. BCLibs.push_back(DriverArgs.MakeArgString(Lib));
  278. }
  279. // If --hip-device-lib is not set, add the default bitcode libraries.
  280. if (BCLibs.empty()) {
  281. // Get the bc lib file name for ISA version. For example,
  282. // gfx803 => oclc_isa_version_803.amdgcn.bc.
  283. std::string GFXVersion = GpuArch.drop_front(3).str();
  284. std::string ISAVerBC = "oclc_isa_version_" + GFXVersion + ".amdgcn.bc";
  285. llvm::StringRef FlushDenormalControlBC;
  286. if (DriverArgs.hasArg(options::OPT_fcuda_flush_denormals_to_zero))
  287. FlushDenormalControlBC = "oclc_daz_opt_on.amdgcn.bc";
  288. else
  289. FlushDenormalControlBC = "oclc_daz_opt_off.amdgcn.bc";
  290. llvm::StringRef WaveFrontSizeBC;
  291. if (stoi(GFXVersion) < 1000)
  292. WaveFrontSizeBC = "oclc_wavefrontsize64_on.amdgcn.bc";
  293. else
  294. WaveFrontSizeBC = "oclc_wavefrontsize64_off.amdgcn.bc";
  295. BCLibs.append({"hip.amdgcn.bc", "opencl.amdgcn.bc", "ocml.amdgcn.bc",
  296. "ockl.amdgcn.bc", "oclc_finite_only_off.amdgcn.bc",
  297. FlushDenormalControlBC,
  298. "oclc_correctly_rounded_sqrt_on.amdgcn.bc",
  299. "oclc_unsafe_math_off.amdgcn.bc", ISAVerBC,
  300. WaveFrontSizeBC});
  301. }
  302. for (auto Lib : BCLibs)
  303. addBCLib(getDriver(), DriverArgs, CC1Args, LibraryPaths, Lib);
  304. }
  305. llvm::opt::DerivedArgList *
  306. HIPToolChain::TranslateArgs(const llvm::opt::DerivedArgList &Args,
  307. StringRef BoundArch,
  308. Action::OffloadKind DeviceOffloadKind) const {
  309. DerivedArgList *DAL =
  310. HostTC.TranslateArgs(Args, BoundArch, DeviceOffloadKind);
  311. if (!DAL)
  312. DAL = new DerivedArgList(Args.getBaseArgs());
  313. const OptTable &Opts = getDriver().getOpts();
  314. for (Arg *A : Args) {
  315. if (A->getOption().matches(options::OPT_Xarch__)) {
  316. // Skip this argument unless the architecture matches BoundArch.
  317. if (BoundArch.empty() || A->getValue(0) != BoundArch)
  318. continue;
  319. unsigned Index = Args.getBaseArgs().MakeIndex(A->getValue(1));
  320. unsigned Prev = Index;
  321. std::unique_ptr<Arg> XarchArg(Opts.ParseOneArg(Args, Index));
  322. // If the argument parsing failed or more than one argument was
  323. // consumed, the -Xarch_ argument's parameter tried to consume
  324. // extra arguments. Emit an error and ignore.
  325. //
  326. // We also want to disallow any options which would alter the
  327. // driver behavior; that isn't going to work in our model. We
  328. // use isDriverOption() as an approximation, although things
  329. // like -O4 are going to slip through.
  330. if (!XarchArg || Index > Prev + 1) {
  331. getDriver().Diag(diag::err_drv_invalid_Xarch_argument_with_args)
  332. << A->getAsString(Args);
  333. continue;
  334. } else if (XarchArg->getOption().hasFlag(options::DriverOption)) {
  335. getDriver().Diag(diag::err_drv_invalid_Xarch_argument_isdriver)
  336. << A->getAsString(Args);
  337. continue;
  338. }
  339. XarchArg->setBaseArg(A);
  340. A = XarchArg.release();
  341. DAL->AddSynthesizedArg(A);
  342. }
  343. DAL->append(A);
  344. }
  345. if (!BoundArch.empty()) {
  346. DAL->eraseArg(options::OPT_march_EQ);
  347. DAL->AddJoinedArg(nullptr, Opts.getOption(options::OPT_march_EQ), BoundArch);
  348. }
  349. return DAL;
  350. }
  351. Tool *HIPToolChain::buildLinker() const {
  352. assert(getTriple().getArch() == llvm::Triple::amdgcn);
  353. return new tools::AMDGCN::Linker(*this);
  354. }
  355. void HIPToolChain::addClangWarningOptions(ArgStringList &CC1Args) const {
  356. HostTC.addClangWarningOptions(CC1Args);
  357. }
  358. ToolChain::CXXStdlibType
  359. HIPToolChain::GetCXXStdlibType(const ArgList &Args) const {
  360. return HostTC.GetCXXStdlibType(Args);
  361. }
  362. void HIPToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
  363. ArgStringList &CC1Args) const {
  364. HostTC.AddClangSystemIncludeArgs(DriverArgs, CC1Args);
  365. }
  366. void HIPToolChain::AddClangCXXStdlibIncludeArgs(const ArgList &Args,
  367. ArgStringList &CC1Args) const {
  368. HostTC.AddClangCXXStdlibIncludeArgs(Args, CC1Args);
  369. }
  370. void HIPToolChain::AddIAMCUIncludeArgs(const ArgList &Args,
  371. ArgStringList &CC1Args) const {
  372. HostTC.AddIAMCUIncludeArgs(Args, CC1Args);
  373. }
  374. SanitizerMask HIPToolChain::getSupportedSanitizers() const {
  375. // The HIPToolChain only supports sanitizers in the sense that it allows
  376. // sanitizer arguments on the command line if they are supported by the host
  377. // toolchain. The HIPToolChain will actually ignore any command line
  378. // arguments for any of these "supported" sanitizers. That means that no
  379. // sanitization of device code is actually supported at this time.
  380. //
  381. // This behavior is necessary because the host and device toolchains
  382. // invocations often share the command line, so the device toolchain must
  383. // tolerate flags meant only for the host toolchain.
  384. return HostTC.getSupportedSanitizers();
  385. }
  386. VersionTuple HIPToolChain::computeMSVCVersion(const Driver *D,
  387. const ArgList &Args) const {
  388. return HostTC.computeMSVCVersion(D, Args);
  389. }