PPCTargetMachine.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532
  1. //===-- PPCTargetMachine.cpp - Define TargetMachine for PowerPC -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Top-level implementation for the PowerPC target.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "PPCTargetMachine.h"
  13. #include "MCTargetDesc/PPCMCTargetDesc.h"
  14. #include "PPC.h"
  15. #include "PPCMachineScheduler.h"
  16. #include "PPCSubtarget.h"
  17. #include "PPCTargetObjectFile.h"
  18. #include "PPCTargetTransformInfo.h"
  19. #include "TargetInfo/PowerPCTargetInfo.h"
  20. #include "llvm/ADT/Optional.h"
  21. #include "llvm/ADT/STLExtras.h"
  22. #include "llvm/ADT/StringRef.h"
  23. #include "llvm/ADT/Triple.h"
  24. #include "llvm/Analysis/TargetTransformInfo.h"
  25. #include "llvm/CodeGen/Passes.h"
  26. #include "llvm/CodeGen/TargetPassConfig.h"
  27. #include "llvm/CodeGen/MachineScheduler.h"
  28. #include "llvm/IR/Attributes.h"
  29. #include "llvm/IR/DataLayout.h"
  30. #include "llvm/IR/Function.h"
  31. #include "llvm/Pass.h"
  32. #include "llvm/Support/CodeGen.h"
  33. #include "llvm/Support/CommandLine.h"
  34. #include "llvm/Support/TargetRegistry.h"
  35. #include "llvm/Target/TargetLoweringObjectFile.h"
  36. #include "llvm/Target/TargetOptions.h"
  37. #include "llvm/Transforms/Scalar.h"
  38. #include <cassert>
  39. #include <memory>
  40. #include <string>
  41. using namespace llvm;
  42. static cl::opt<bool>
  43. EnableBranchCoalescing("enable-ppc-branch-coalesce", cl::Hidden,
  44. cl::desc("enable coalescing of duplicate branches for PPC"));
  45. static cl::
  46. opt<bool> DisableCTRLoops("disable-ppc-ctrloops", cl::Hidden,
  47. cl::desc("Disable CTR loops for PPC"));
  48. static cl::
  49. opt<bool> DisablePreIncPrep("disable-ppc-preinc-prep", cl::Hidden,
  50. cl::desc("Disable PPC loop preinc prep"));
  51. static cl::opt<bool>
  52. VSXFMAMutateEarly("schedule-ppc-vsx-fma-mutation-early",
  53. cl::Hidden, cl::desc("Schedule VSX FMA instruction mutation early"));
  54. static cl::
  55. opt<bool> DisableVSXSwapRemoval("disable-ppc-vsx-swap-removal", cl::Hidden,
  56. cl::desc("Disable VSX Swap Removal for PPC"));
  57. static cl::
  58. opt<bool> DisableQPXLoadSplat("disable-ppc-qpx-load-splat", cl::Hidden,
  59. cl::desc("Disable QPX load splat simplification"));
  60. static cl::
  61. opt<bool> DisableMIPeephole("disable-ppc-peephole", cl::Hidden,
  62. cl::desc("Disable machine peepholes for PPC"));
  63. static cl::opt<bool>
  64. EnableGEPOpt("ppc-gep-opt", cl::Hidden,
  65. cl::desc("Enable optimizations on complex GEPs"),
  66. cl::init(true));
  67. static cl::opt<bool>
  68. EnablePrefetch("enable-ppc-prefetching",
  69. cl::desc("disable software prefetching on PPC"),
  70. cl::init(false), cl::Hidden);
  71. static cl::opt<bool>
  72. EnableExtraTOCRegDeps("enable-ppc-extra-toc-reg-deps",
  73. cl::desc("Add extra TOC register dependencies"),
  74. cl::init(true), cl::Hidden);
  75. static cl::opt<bool>
  76. EnableMachineCombinerPass("ppc-machine-combiner",
  77. cl::desc("Enable the machine combiner pass"),
  78. cl::init(true), cl::Hidden);
  79. static cl::opt<bool>
  80. ReduceCRLogical("ppc-reduce-cr-logicals",
  81. cl::desc("Expand eligible cr-logical binary ops to branches"),
  82. cl::init(false), cl::Hidden);
  83. extern "C" void LLVMInitializePowerPCTarget() {
  84. // Register the targets
  85. RegisterTargetMachine<PPCTargetMachine> A(getThePPC32Target());
  86. RegisterTargetMachine<PPCTargetMachine> B(getThePPC64Target());
  87. RegisterTargetMachine<PPCTargetMachine> C(getThePPC64LETarget());
  88. PassRegistry &PR = *PassRegistry::getPassRegistry();
  89. #ifndef NDEBUG
  90. initializePPCCTRLoopsVerifyPass(PR);
  91. #endif
  92. initializePPCLoopPreIncPrepPass(PR);
  93. initializePPCTOCRegDepsPass(PR);
  94. initializePPCEarlyReturnPass(PR);
  95. initializePPCVSXCopyPass(PR);
  96. initializePPCVSXFMAMutatePass(PR);
  97. initializePPCVSXSwapRemovalPass(PR);
  98. initializePPCReduceCRLogicalsPass(PR);
  99. initializePPCBSelPass(PR);
  100. initializePPCBranchCoalescingPass(PR);
  101. initializePPCQPXLoadSplatPass(PR);
  102. initializePPCBoolRetToIntPass(PR);
  103. initializePPCExpandISELPass(PR);
  104. initializePPCPreEmitPeepholePass(PR);
  105. initializePPCTLSDynamicCallPass(PR);
  106. initializePPCMIPeepholePass(PR);
  107. }
  108. /// Return the datalayout string of a subtarget.
  109. static std::string getDataLayoutString(const Triple &T) {
  110. bool is64Bit = T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le;
  111. std::string Ret;
  112. // Most PPC* platforms are big endian, PPC64LE is little endian.
  113. if (T.getArch() == Triple::ppc64le)
  114. Ret = "e";
  115. else
  116. Ret = "E";
  117. Ret += DataLayout::getManglingComponent(T);
  118. // PPC32 has 32 bit pointers. The PS3 (OS Lv2) is a PPC64 machine with 32 bit
  119. // pointers.
  120. if (!is64Bit || T.getOS() == Triple::Lv2)
  121. Ret += "-p:32:32";
  122. // Note, the alignment values for f64 and i64 on ppc64 in Darwin
  123. // documentation are wrong; these are correct (i.e. "what gcc does").
  124. if (is64Bit || !T.isOSDarwin())
  125. Ret += "-i64:64";
  126. else
  127. Ret += "-f64:32:64";
  128. // PPC64 has 32 and 64 bit registers, PPC32 has only 32 bit ones.
  129. if (is64Bit)
  130. Ret += "-n32:64";
  131. else
  132. Ret += "-n32";
  133. return Ret;
  134. }
  135. static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL,
  136. const Triple &TT) {
  137. std::string FullFS = FS;
  138. // Make sure 64-bit features are available when CPUname is generic
  139. if (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le) {
  140. if (!FullFS.empty())
  141. FullFS = "+64bit," + FullFS;
  142. else
  143. FullFS = "+64bit";
  144. }
  145. if (OL >= CodeGenOpt::Default) {
  146. if (!FullFS.empty())
  147. FullFS = "+crbits," + FullFS;
  148. else
  149. FullFS = "+crbits";
  150. }
  151. if (OL != CodeGenOpt::None) {
  152. if (!FullFS.empty())
  153. FullFS = "+invariant-function-descriptors," + FullFS;
  154. else
  155. FullFS = "+invariant-function-descriptors";
  156. }
  157. return FullFS;
  158. }
  159. static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
  160. if (TT.isOSDarwin())
  161. return llvm::make_unique<TargetLoweringObjectFileMachO>();
  162. if (TT.isOSAIX())
  163. return llvm::make_unique<TargetLoweringObjectFileXCOFF>();
  164. return llvm::make_unique<PPC64LinuxTargetObjectFile>();
  165. }
  166. static PPCTargetMachine::PPCABI computeTargetABI(const Triple &TT,
  167. const TargetOptions &Options) {
  168. if (TT.isOSDarwin())
  169. report_fatal_error("Darwin is no longer supported for PowerPC");
  170. if (Options.MCOptions.getABIName().startswith("elfv1"))
  171. return PPCTargetMachine::PPC_ABI_ELFv1;
  172. else if (Options.MCOptions.getABIName().startswith("elfv2"))
  173. return PPCTargetMachine::PPC_ABI_ELFv2;
  174. assert(Options.MCOptions.getABIName().empty() &&
  175. "Unknown target-abi option!");
  176. if (TT.isMacOSX())
  177. return PPCTargetMachine::PPC_ABI_UNKNOWN;
  178. switch (TT.getArch()) {
  179. case Triple::ppc64le:
  180. return PPCTargetMachine::PPC_ABI_ELFv2;
  181. case Triple::ppc64:
  182. if (TT.getEnvironment() == llvm::Triple::ELFv2)
  183. return PPCTargetMachine::PPC_ABI_ELFv2;
  184. return PPCTargetMachine::PPC_ABI_ELFv1;
  185. default:
  186. return PPCTargetMachine::PPC_ABI_UNKNOWN;
  187. }
  188. }
  189. static Reloc::Model getEffectiveRelocModel(const Triple &TT,
  190. Optional<Reloc::Model> RM) {
  191. if (RM.hasValue())
  192. return *RM;
  193. // Darwin defaults to dynamic-no-pic.
  194. if (TT.isOSDarwin())
  195. return Reloc::DynamicNoPIC;
  196. // Big Endian PPC is PIC by default.
  197. if (TT.getArch() == Triple::ppc64)
  198. return Reloc::PIC_;
  199. // Rest are static by default.
  200. return Reloc::Static;
  201. }
  202. static CodeModel::Model getEffectivePPCCodeModel(const Triple &TT,
  203. Optional<CodeModel::Model> CM,
  204. bool JIT) {
  205. if (CM) {
  206. if (*CM == CodeModel::Tiny)
  207. report_fatal_error("Target does not support the tiny CodeModel", false);
  208. if (*CM == CodeModel::Kernel)
  209. report_fatal_error("Target does not support the kernel CodeModel", false);
  210. return *CM;
  211. }
  212. if (!TT.isOSDarwin() && !JIT &&
  213. (TT.getArch() == Triple::ppc64 || TT.getArch() == Triple::ppc64le))
  214. return CodeModel::Medium;
  215. return CodeModel::Small;
  216. }
  217. static ScheduleDAGInstrs *createPPCMachineScheduler(MachineSchedContext *C) {
  218. const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
  219. ScheduleDAGMILive *DAG =
  220. new ScheduleDAGMILive(C, ST.usePPCPreRASchedStrategy() ?
  221. llvm::make_unique<PPCPreRASchedStrategy>(C) :
  222. llvm::make_unique<GenericScheduler>(C));
  223. // add DAG Mutations here.
  224. DAG->addMutation(createCopyConstrainDAGMutation(DAG->TII, DAG->TRI));
  225. return DAG;
  226. }
  227. static ScheduleDAGInstrs *createPPCPostMachineScheduler(
  228. MachineSchedContext *C) {
  229. const PPCSubtarget &ST = C->MF->getSubtarget<PPCSubtarget>();
  230. ScheduleDAGMI *DAG =
  231. new ScheduleDAGMI(C, ST.usePPCPostRASchedStrategy() ?
  232. llvm::make_unique<PPCPostRASchedStrategy>(C) :
  233. llvm::make_unique<PostGenericScheduler>(C), true);
  234. // add DAG Mutations here.
  235. return DAG;
  236. }
  237. // The FeatureString here is a little subtle. We are modifying the feature
  238. // string with what are (currently) non-function specific overrides as it goes
  239. // into the LLVMTargetMachine constructor and then using the stored value in the
  240. // Subtarget constructor below it.
  241. PPCTargetMachine::PPCTargetMachine(const Target &T, const Triple &TT,
  242. StringRef CPU, StringRef FS,
  243. const TargetOptions &Options,
  244. Optional<Reloc::Model> RM,
  245. Optional<CodeModel::Model> CM,
  246. CodeGenOpt::Level OL, bool JIT)
  247. : LLVMTargetMachine(T, getDataLayoutString(TT), TT, CPU,
  248. computeFSAdditions(FS, OL, TT), Options,
  249. getEffectiveRelocModel(TT, RM),
  250. getEffectivePPCCodeModel(TT, CM, JIT), OL),
  251. TLOF(createTLOF(getTargetTriple())),
  252. TargetABI(computeTargetABI(TT, Options)) {
  253. initAsmInfo();
  254. }
  255. PPCTargetMachine::~PPCTargetMachine() = default;
  256. const PPCSubtarget *
  257. PPCTargetMachine::getSubtargetImpl(const Function &F) const {
  258. Attribute CPUAttr = F.getFnAttribute("target-cpu");
  259. Attribute FSAttr = F.getFnAttribute("target-features");
  260. std::string CPU = !CPUAttr.hasAttribute(Attribute::None)
  261. ? CPUAttr.getValueAsString().str()
  262. : TargetCPU;
  263. std::string FS = !FSAttr.hasAttribute(Attribute::None)
  264. ? FSAttr.getValueAsString().str()
  265. : TargetFS;
  266. // FIXME: This is related to the code below to reset the target options,
  267. // we need to know whether or not the soft float flag is set on the
  268. // function before we can generate a subtarget. We also need to use
  269. // it as a key for the subtarget since that can be the only difference
  270. // between two functions.
  271. bool SoftFloat =
  272. F.getFnAttribute("use-soft-float").getValueAsString() == "true";
  273. // If the soft float attribute is set on the function turn on the soft float
  274. // subtarget feature.
  275. if (SoftFloat)
  276. FS += FS.empty() ? "-hard-float" : ",-hard-float";
  277. auto &I = SubtargetMap[CPU + FS];
  278. if (!I) {
  279. // This needs to be done before we create a new subtarget since any
  280. // creation will depend on the TM and the code generation flags on the
  281. // function that reside in TargetOptions.
  282. resetTargetOptions(F);
  283. I = llvm::make_unique<PPCSubtarget>(
  284. TargetTriple, CPU,
  285. // FIXME: It would be good to have the subtarget additions here
  286. // not necessary. Anything that turns them on/off (overrides) ends
  287. // up being put at the end of the feature string, but the defaults
  288. // shouldn't require adding them. Fixing this means pulling Feature64Bit
  289. // out of most of the target cpus in the .td file and making it set only
  290. // as part of initialization via the TargetTriple.
  291. computeFSAdditions(FS, getOptLevel(), getTargetTriple()), *this);
  292. }
  293. return I.get();
  294. }
  295. //===----------------------------------------------------------------------===//
  296. // Pass Pipeline Configuration
  297. //===----------------------------------------------------------------------===//
  298. namespace {
  299. /// PPC Code Generator Pass Configuration Options.
  300. class PPCPassConfig : public TargetPassConfig {
  301. public:
  302. PPCPassConfig(PPCTargetMachine &TM, PassManagerBase &PM)
  303. : TargetPassConfig(TM, PM) {
  304. // At any optimization level above -O0 we use the Machine Scheduler and not
  305. // the default Post RA List Scheduler.
  306. if (TM.getOptLevel() != CodeGenOpt::None)
  307. substitutePass(&PostRASchedulerID, &PostMachineSchedulerID);
  308. }
  309. PPCTargetMachine &getPPCTargetMachine() const {
  310. return getTM<PPCTargetMachine>();
  311. }
  312. void addIRPasses() override;
  313. bool addPreISel() override;
  314. bool addILPOpts() override;
  315. bool addInstSelector() override;
  316. void addMachineSSAOptimization() override;
  317. void addPreRegAlloc() override;
  318. void addPreSched2() override;
  319. void addPreEmitPass() override;
  320. ScheduleDAGInstrs *
  321. createMachineScheduler(MachineSchedContext *C) const override {
  322. return createPPCMachineScheduler(C);
  323. }
  324. ScheduleDAGInstrs *
  325. createPostMachineScheduler(MachineSchedContext *C) const override {
  326. return createPPCPostMachineScheduler(C);
  327. }
  328. };
  329. } // end anonymous namespace
  330. TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) {
  331. return new PPCPassConfig(*this, PM);
  332. }
  333. void PPCPassConfig::addIRPasses() {
  334. if (TM->getOptLevel() != CodeGenOpt::None)
  335. addPass(createPPCBoolRetToIntPass());
  336. addPass(createAtomicExpandPass());
  337. // For the BG/Q (or if explicitly requested), add explicit data prefetch
  338. // intrinsics.
  339. bool UsePrefetching = TM->getTargetTriple().getVendor() == Triple::BGQ &&
  340. getOptLevel() != CodeGenOpt::None;
  341. if (EnablePrefetch.getNumOccurrences() > 0)
  342. UsePrefetching = EnablePrefetch;
  343. if (UsePrefetching)
  344. addPass(createLoopDataPrefetchPass());
  345. if (TM->getOptLevel() >= CodeGenOpt::Default && EnableGEPOpt) {
  346. // Call SeparateConstOffsetFromGEP pass to extract constants within indices
  347. // and lower a GEP with multiple indices to either arithmetic operations or
  348. // multiple GEPs with single index.
  349. addPass(createSeparateConstOffsetFromGEPPass(true));
  350. // Call EarlyCSE pass to find and remove subexpressions in the lowered
  351. // result.
  352. addPass(createEarlyCSEPass());
  353. // Do loop invariant code motion in case part of the lowered result is
  354. // invariant.
  355. addPass(createLICMPass());
  356. }
  357. TargetPassConfig::addIRPasses();
  358. }
  359. bool PPCPassConfig::addPreISel() {
  360. if (!DisablePreIncPrep && getOptLevel() != CodeGenOpt::None)
  361. addPass(createPPCLoopPreIncPrepPass(getPPCTargetMachine()));
  362. if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
  363. addPass(createHardwareLoopsPass());
  364. return false;
  365. }
  366. bool PPCPassConfig::addILPOpts() {
  367. addPass(&EarlyIfConverterID);
  368. if (EnableMachineCombinerPass)
  369. addPass(&MachineCombinerID);
  370. return true;
  371. }
  372. bool PPCPassConfig::addInstSelector() {
  373. // Install an instruction selector.
  374. addPass(createPPCISelDag(getPPCTargetMachine(), getOptLevel()));
  375. #ifndef NDEBUG
  376. if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
  377. addPass(createPPCCTRLoopsVerify());
  378. #endif
  379. addPass(createPPCVSXCopyPass());
  380. return false;
  381. }
  382. void PPCPassConfig::addMachineSSAOptimization() {
  383. // PPCBranchCoalescingPass need to be done before machine sinking
  384. // since it merges empty blocks.
  385. if (EnableBranchCoalescing && getOptLevel() != CodeGenOpt::None)
  386. addPass(createPPCBranchCoalescingPass());
  387. TargetPassConfig::addMachineSSAOptimization();
  388. // For little endian, remove where possible the vector swap instructions
  389. // introduced at code generation to normalize vector element order.
  390. if (TM->getTargetTriple().getArch() == Triple::ppc64le &&
  391. !DisableVSXSwapRemoval)
  392. addPass(createPPCVSXSwapRemovalPass());
  393. // Reduce the number of cr-logical ops.
  394. if (ReduceCRLogical && getOptLevel() != CodeGenOpt::None)
  395. addPass(createPPCReduceCRLogicalsPass());
  396. // Target-specific peephole cleanups performed after instruction
  397. // selection.
  398. if (!DisableMIPeephole) {
  399. addPass(createPPCMIPeepholePass());
  400. addPass(&DeadMachineInstructionElimID);
  401. }
  402. }
  403. void PPCPassConfig::addPreRegAlloc() {
  404. if (getOptLevel() != CodeGenOpt::None) {
  405. initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
  406. insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
  407. &PPCVSXFMAMutateID);
  408. }
  409. // FIXME: We probably don't need to run these for -fPIE.
  410. if (getPPCTargetMachine().isPositionIndependent()) {
  411. // FIXME: LiveVariables should not be necessary here!
  412. // PPCTLSDynamicCallPass uses LiveIntervals which previously dependent on
  413. // LiveVariables. This (unnecessary) dependency has been removed now,
  414. // however a stage-2 clang build fails without LiveVariables computed here.
  415. addPass(&LiveVariablesID, false);
  416. addPass(createPPCTLSDynamicCallPass());
  417. }
  418. if (EnableExtraTOCRegDeps)
  419. addPass(createPPCTOCRegDepsPass());
  420. if (getOptLevel() != CodeGenOpt::None)
  421. addPass(&MachinePipelinerID);
  422. }
  423. void PPCPassConfig::addPreSched2() {
  424. if (getOptLevel() != CodeGenOpt::None) {
  425. addPass(&IfConverterID);
  426. // This optimization must happen after anything that might do store-to-load
  427. // forwarding. Here we're after RA (and, thus, when spills are inserted)
  428. // but before post-RA scheduling.
  429. if (!DisableQPXLoadSplat)
  430. addPass(createPPCQPXLoadSplatPass());
  431. }
  432. }
  433. void PPCPassConfig::addPreEmitPass() {
  434. addPass(createPPCPreEmitPeepholePass());
  435. addPass(createPPCExpandISELPass());
  436. if (getOptLevel() != CodeGenOpt::None)
  437. addPass(createPPCEarlyReturnPass(), false);
  438. // Must run branch selection immediately preceding the asm printer.
  439. addPass(createPPCBranchSelectionPass(), false);
  440. }
  441. TargetTransformInfo
  442. PPCTargetMachine::getTargetTransformInfo(const Function &F) {
  443. return TargetTransformInfo(PPCTTIImpl(this, F));
  444. }
  445. static MachineSchedRegistry
  446. PPCPreRASchedRegistry("ppc-prera",
  447. "Run PowerPC PreRA specific scheduler",
  448. createPPCMachineScheduler);
  449. static MachineSchedRegistry
  450. PPCPostRASchedRegistry("ppc-postra",
  451. "Run PowerPC PostRA specific scheduler",
  452. createPPCPostMachineScheduler);