FunctionImport.cpp 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485
  1. //===- FunctionImport.cpp - ThinLTO Summary-based Function Import ---------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements Function import based on summaries.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Transforms/IPO/FunctionImport.h"
  14. #include "llvm/ADT/SmallVector.h"
  15. #include "llvm/ADT/Statistic.h"
  16. #include "llvm/ADT/StringSet.h"
  17. #include "llvm/IR/AutoUpgrade.h"
  18. #include "llvm/IR/DiagnosticPrinter.h"
  19. #include "llvm/IR/IntrinsicInst.h"
  20. #include "llvm/IR/Module.h"
  21. #include "llvm/IRReader/IRReader.h"
  22. #include "llvm/Linker/Linker.h"
  23. #include "llvm/Object/ModuleSummaryIndexObjectFile.h"
  24. #include "llvm/Support/CommandLine.h"
  25. #include "llvm/Support/Debug.h"
  26. #include "llvm/Support/SourceMgr.h"
  27. #include "llvm/Transforms/Utils/FunctionImportUtils.h"
  28. #define DEBUG_TYPE "function-import"
  29. using namespace llvm;
  30. STATISTIC(NumImported, "Number of functions imported");
  31. /// Limit on instruction count of imported functions.
  32. static cl::opt<unsigned> ImportInstrLimit(
  33. "import-instr-limit", cl::init(100), cl::Hidden, cl::value_desc("N"),
  34. cl::desc("Only import functions with less than N instructions"));
  35. static cl::opt<float>
  36. ImportInstrFactor("import-instr-evolution-factor", cl::init(0.7),
  37. cl::Hidden, cl::value_desc("x"),
  38. cl::desc("As we import functions, multiply the "
  39. "`import-instr-limit` threshold by this factor "
  40. "before processing newly imported functions"));
  41. static cl::opt<bool> PrintImports("print-imports", cl::init(false), cl::Hidden,
  42. cl::desc("Print imported functions"));
  43. // Load lazily a module from \p FileName in \p Context.
  44. static std::unique_ptr<Module> loadFile(const std::string &FileName,
  45. LLVMContext &Context) {
  46. SMDiagnostic Err;
  47. DEBUG(dbgs() << "Loading '" << FileName << "'\n");
  48. // Metadata isn't loaded until functions are imported, to minimize
  49. // the memory overhead.
  50. std::unique_ptr<Module> Result =
  51. getLazyIRFileModule(FileName, Err, Context,
  52. /* ShouldLazyLoadMetadata = */ true);
  53. if (!Result) {
  54. Err.print("function-import", errs());
  55. return nullptr;
  56. }
  57. return Result;
  58. }
  59. namespace {
  60. /// Given a list of possible callee implementation for a call site, select one
  61. /// that fits the \p Threshold.
  62. ///
  63. /// FIXME: select "best" instead of first that fits. But what is "best"?
  64. /// - The smallest: more likely to be inlined.
  65. /// - The one with the least outgoing edges (already well optimized).
  66. /// - One from a module already being imported from in order to reduce the
  67. /// number of source modules parsed/linked.
  68. /// - One that has PGO data attached.
  69. /// - [insert you fancy metric here]
  70. static const FunctionSummary *
  71. selectCallee(const GlobalValueInfoList &CalleeInfoList, unsigned Threshold) {
  72. auto It = llvm::find_if(
  73. CalleeInfoList, [&](const std::unique_ptr<GlobalValueInfo> &GlobInfo) {
  74. assert(GlobInfo->summary() &&
  75. "We should not have a Global Info without summary");
  76. auto *Summary = cast<FunctionSummary>(GlobInfo->summary());
  77. if (GlobalValue::isWeakAnyLinkage(Summary->linkage()))
  78. return false;
  79. if (Summary->instCount() > Threshold)
  80. return false;
  81. return true;
  82. });
  83. if (It == CalleeInfoList.end())
  84. return nullptr;
  85. return cast<FunctionSummary>((*It)->summary());
  86. }
  87. /// Return the summary for the function \p GUID that fits the \p Threshold, or
  88. /// null if there's no match.
  89. static const FunctionSummary *selectCallee(uint64_t GUID, unsigned Threshold,
  90. const ModuleSummaryIndex &Index) {
  91. auto CalleeInfoList = Index.findGlobalValueInfoList(GUID);
  92. if (CalleeInfoList == Index.end()) {
  93. return nullptr; // This function does not have a summary
  94. }
  95. return selectCallee(CalleeInfoList->second, Threshold);
  96. }
  97. /// Return true if the global \p GUID is exported by module \p ExportModulePath.
  98. static bool isGlobalExported(const ModuleSummaryIndex &Index,
  99. StringRef ExportModulePath, uint64_t GUID) {
  100. auto CalleeInfoList = Index.findGlobalValueInfoList(GUID);
  101. if (CalleeInfoList == Index.end())
  102. // This global does not have a summary, it is not part of the ThinLTO
  103. // process
  104. return false;
  105. auto DefinedInCalleeModule = llvm::find_if(
  106. CalleeInfoList->second,
  107. [&](const std::unique_ptr<GlobalValueInfo> &GlobInfo) {
  108. auto *Summary = GlobInfo->summary();
  109. assert(Summary && "Unexpected GlobalValueInfo without summary");
  110. return Summary->modulePath() == ExportModulePath;
  111. });
  112. return (DefinedInCalleeModule != CalleeInfoList->second.end());
  113. }
  114. using EdgeInfo = std::pair<const FunctionSummary *, unsigned /* Threshold */>;
  115. /// Compute the list of functions to import for a given caller. Mark these
  116. /// imported functions and the symbols they reference in their source module as
  117. /// exported from their source module.
  118. static void computeImportForFunction(
  119. StringRef ModulePath, const FunctionSummary &Summary,
  120. const ModuleSummaryIndex &Index, unsigned Threshold,
  121. const std::map<uint64_t, FunctionSummary *> &DefinedFunctions,
  122. SmallVectorImpl<EdgeInfo> &Worklist,
  123. FunctionImporter::ImportMapTy &ImportsForModule,
  124. StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
  125. for (auto &Edge : Summary.calls()) {
  126. auto GUID = Edge.first;
  127. DEBUG(dbgs() << " edge -> " << GUID << " Threshold:" << Threshold << "\n");
  128. if (DefinedFunctions.count(GUID)) {
  129. DEBUG(dbgs() << "ignored! Target already in destination module.\n");
  130. continue;
  131. }
  132. auto *CalleeSummary = selectCallee(GUID, Threshold, Index);
  133. if (!CalleeSummary) {
  134. DEBUG(dbgs() << "ignored! No qualifying callee with summary found.\n");
  135. continue;
  136. }
  137. assert(CalleeSummary->instCount() <= Threshold &&
  138. "selectCallee() didn't honor the threshold");
  139. auto &ProcessedThreshold =
  140. ImportsForModule[CalleeSummary->modulePath()][GUID];
  141. /// Since the traversal of the call graph is DFS, we can revisit a function
  142. /// a second time with a higher threshold. In this case, it is added back to
  143. /// the worklist with the new threshold.
  144. if (ProcessedThreshold && ProcessedThreshold > Threshold) {
  145. DEBUG(dbgs() << "ignored! Target was already seen with Threshold "
  146. << ProcessedThreshold << "\n");
  147. continue;
  148. }
  149. // Mark this function as imported in this module, with the current Threshold
  150. ProcessedThreshold = Threshold;
  151. // Make exports in the source module.
  152. auto ExportModulePath = CalleeSummary->modulePath();
  153. auto ExportList = ExportLists[ExportModulePath];
  154. ExportList.insert(GUID);
  155. // Mark all functions and globals referenced by this function as exported to
  156. // the outside if they are defined in the same source module.
  157. for (auto &Edge : CalleeSummary->calls()) {
  158. auto CalleeGUID = Edge.first;
  159. if (isGlobalExported(Index, ExportModulePath, CalleeGUID))
  160. ExportList.insert(CalleeGUID);
  161. }
  162. for (auto &GUID : CalleeSummary->refs()) {
  163. if (isGlobalExported(Index, ExportModulePath, GUID))
  164. ExportList.insert(GUID);
  165. }
  166. // Insert the newly imported function to the worklist.
  167. Worklist.push_back(std::make_pair(CalleeSummary, Threshold));
  168. }
  169. }
  170. /// Given the list of globals defined in a module, compute the list of imports
  171. /// as well as the list of "exports", i.e. the list of symbols referenced from
  172. /// another module (that may require promotion).
  173. static void ComputeImportForModule(
  174. StringRef ModulePath,
  175. const std::map<uint64_t, FunctionSummary *> &DefinedFunctions,
  176. const ModuleSummaryIndex &Index,
  177. FunctionImporter::ImportMapTy &ImportsForModule,
  178. StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
  179. // Worklist contains the list of function imported in this module, for which
  180. // we will analyse the callees and may import further down the callgraph.
  181. SmallVector<EdgeInfo, 128> Worklist;
  182. // Populate the worklist with the import for the functions in the current
  183. // module
  184. for (auto &FuncInfo : DefinedFunctions) {
  185. auto *Summary = FuncInfo.second;
  186. DEBUG(dbgs() << "Initalize import for " << FuncInfo.first << "\n");
  187. computeImportForFunction(ModulePath, *Summary, Index, ImportInstrLimit,
  188. DefinedFunctions, Worklist, ImportsForModule,
  189. ExportLists);
  190. }
  191. while (!Worklist.empty()) {
  192. auto FuncInfo = Worklist.pop_back_val();
  193. auto *Summary = FuncInfo.first;
  194. auto Threshold = FuncInfo.second;
  195. // Process the newly imported functions and add callees to the worklist.
  196. // Adjust the threshold
  197. Threshold = Threshold * ImportInstrFactor;
  198. computeImportForFunction(ModulePath, *Summary, Index, Threshold,
  199. DefinedFunctions, Worklist, ImportsForModule,
  200. ExportLists);
  201. }
  202. }
  203. } // anonymous namespace
  204. /// Compute all the import and export for every module in the Index.
  205. void llvm::ComputeCrossModuleImport(
  206. const ModuleSummaryIndex &Index,
  207. StringMap<FunctionImporter::ImportMapTy> &ImportLists,
  208. StringMap<FunctionImporter::ExportSetTy> &ExportLists) {
  209. auto ModuleCount = Index.modulePaths().size();
  210. // Collect for each module the list of function it defines.
  211. // GUID -> Summary
  212. StringMap<std::map<uint64_t, FunctionSummary *>> Module2FunctionInfoMap(
  213. ModuleCount);
  214. for (auto &GlobalList : Index) {
  215. auto GUID = GlobalList.first;
  216. for (auto &GlobInfo : GlobalList.second) {
  217. auto *Summary = dyn_cast_or_null<FunctionSummary>(GlobInfo->summary());
  218. if (!Summary)
  219. /// Ignore global variable, focus on functions
  220. continue;
  221. DEBUG(dbgs() << "Adding definition: Module '" << Summary->modulePath()
  222. << "' defines '" << GUID << "'\n");
  223. Module2FunctionInfoMap[Summary->modulePath()][GUID] = Summary;
  224. }
  225. }
  226. // For each module that has function defined, compute the import/export lists.
  227. for (auto &DefinedFunctions : Module2FunctionInfoMap) {
  228. auto &ImportsForModule = ImportLists[DefinedFunctions.first()];
  229. DEBUG(dbgs() << "Computing import for Module '" << DefinedFunctions.first()
  230. << "'\n");
  231. ComputeImportForModule(DefinedFunctions.first(), DefinedFunctions.second,
  232. Index, ImportsForModule, ExportLists);
  233. }
  234. #ifndef NDEBUG
  235. DEBUG(dbgs() << "Import/Export lists for " << ImportLists.size()
  236. << " modules:\n");
  237. for (auto &ModuleImports : ImportLists) {
  238. auto ModName = ModuleImports.first();
  239. auto &Exports = ExportLists[ModName];
  240. DEBUG(dbgs() << "* Module " << ModName << " exports " << Exports.size()
  241. << " functions. Imports from " << ModuleImports.second.size()
  242. << " modules.\n");
  243. for (auto &Src : ModuleImports.second) {
  244. auto SrcModName = Src.first();
  245. DEBUG(dbgs() << " - " << Src.second.size() << " functions imported from "
  246. << SrcModName << "\n");
  247. }
  248. }
  249. #endif
  250. }
  251. // Automatically import functions in Module \p DestModule based on the summaries
  252. // index.
  253. //
  254. bool FunctionImporter::importFunctions(
  255. Module &DestModule, const FunctionImporter::ImportMapTy &ImportList) {
  256. DEBUG(dbgs() << "Starting import for Module "
  257. << DestModule.getModuleIdentifier() << "\n");
  258. unsigned ImportedCount = 0;
  259. // Linker that will be used for importing function
  260. Linker TheLinker(DestModule);
  261. // Do the actual import of functions now, one Module at a time
  262. std::set<StringRef> ModuleNameOrderedList;
  263. for (auto &FunctionsToImportPerModule : ImportList) {
  264. ModuleNameOrderedList.insert(FunctionsToImportPerModule.first());
  265. }
  266. for (auto &Name : ModuleNameOrderedList) {
  267. // Get the module for the import
  268. const auto &FunctionsToImportPerModule = ImportList.find(Name);
  269. assert(FunctionsToImportPerModule != ImportList.end());
  270. std::unique_ptr<Module> SrcModule = ModuleLoader(Name);
  271. assert(&DestModule.getContext() == &SrcModule->getContext() &&
  272. "Context mismatch");
  273. // If modules were created with lazy metadata loading, materialize it
  274. // now, before linking it (otherwise this will be a noop).
  275. SrcModule->materializeMetadata();
  276. UpgradeDebugInfo(*SrcModule);
  277. auto &ImportGUIDs = FunctionsToImportPerModule->second;
  278. // Find the globals to import
  279. DenseSet<const GlobalValue *> GlobalsToImport;
  280. for (auto &GV : *SrcModule) {
  281. if (GV.hasName() && ImportGUIDs.count(GV.getGUID())) {
  282. GV.materialize();
  283. GlobalsToImport.insert(&GV);
  284. }
  285. }
  286. for (auto &GV : SrcModule->aliases()) {
  287. if (!GV.hasName())
  288. continue;
  289. auto GUID = GV.getGUID();
  290. if (ImportGUIDs.count(GUID)) {
  291. // Alias can't point to "available_externally". However when we import
  292. // linkOnceODR the linkage does not change. So we import the alias
  293. // and aliasee only in this case.
  294. const GlobalObject *GO = GV.getBaseObject();
  295. if (!GO->hasLinkOnceODRLinkage())
  296. continue;
  297. GV.materialize();
  298. GlobalsToImport.insert(&GV);
  299. GlobalsToImport.insert(GO);
  300. }
  301. }
  302. for (auto &GV : SrcModule->globals()) {
  303. if (!GV.hasName())
  304. continue;
  305. auto GUID = GV.getGUID();
  306. if (ImportGUIDs.count(GUID)) {
  307. GV.materialize();
  308. GlobalsToImport.insert(&GV);
  309. }
  310. }
  311. // Link in the specified functions.
  312. if (renameModuleForThinLTO(*SrcModule, Index, &GlobalsToImport))
  313. return true;
  314. if (PrintImports) {
  315. for (const auto *GV : GlobalsToImport)
  316. dbgs() << DestModule.getSourceFileName() << ": Import " << GV->getName()
  317. << " from " << SrcModule->getSourceFileName() << "\n";
  318. }
  319. if (TheLinker.linkInModule(std::move(SrcModule), Linker::Flags::None,
  320. &GlobalsToImport))
  321. report_fatal_error("Function Import: link error");
  322. ImportedCount += GlobalsToImport.size();
  323. }
  324. NumImported += ImportedCount;
  325. DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module "
  326. << DestModule.getModuleIdentifier() << "\n");
  327. return ImportedCount;
  328. }
  329. /// Summary file to use for function importing when using -function-import from
  330. /// the command line.
  331. static cl::opt<std::string>
  332. SummaryFile("summary-file",
  333. cl::desc("The summary file to use for function importing."));
  334. static void diagnosticHandler(const DiagnosticInfo &DI) {
  335. raw_ostream &OS = errs();
  336. DiagnosticPrinterRawOStream DP(OS);
  337. DI.print(DP);
  338. OS << '\n';
  339. }
  340. /// Parse the summary index out of an IR file and return the summary
  341. /// index object if found, or nullptr if not.
  342. static std::unique_ptr<ModuleSummaryIndex>
  343. getModuleSummaryIndexForFile(StringRef Path, std::string &Error,
  344. DiagnosticHandlerFunction DiagnosticHandler) {
  345. std::unique_ptr<MemoryBuffer> Buffer;
  346. ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
  347. MemoryBuffer::getFile(Path);
  348. if (std::error_code EC = BufferOrErr.getError()) {
  349. Error = EC.message();
  350. return nullptr;
  351. }
  352. Buffer = std::move(BufferOrErr.get());
  353. ErrorOr<std::unique_ptr<object::ModuleSummaryIndexObjectFile>> ObjOrErr =
  354. object::ModuleSummaryIndexObjectFile::create(Buffer->getMemBufferRef(),
  355. DiagnosticHandler);
  356. if (std::error_code EC = ObjOrErr.getError()) {
  357. Error = EC.message();
  358. return nullptr;
  359. }
  360. return (*ObjOrErr)->takeIndex();
  361. }
  362. namespace {
  363. /// Pass that performs cross-module function import provided a summary file.
  364. class FunctionImportPass : public ModulePass {
  365. /// Optional module summary index to use for importing, otherwise
  366. /// the summary-file option must be specified.
  367. const ModuleSummaryIndex *Index;
  368. public:
  369. /// Pass identification, replacement for typeid
  370. static char ID;
  371. /// Specify pass name for debug output
  372. const char *getPassName() const override {
  373. return "Function Importing";
  374. }
  375. explicit FunctionImportPass(const ModuleSummaryIndex *Index = nullptr)
  376. : ModulePass(ID), Index(Index) {}
  377. bool runOnModule(Module &M) override {
  378. if (SummaryFile.empty() && !Index)
  379. report_fatal_error("error: -function-import requires -summary-file or "
  380. "file from frontend\n");
  381. std::unique_ptr<ModuleSummaryIndex> IndexPtr;
  382. if (!SummaryFile.empty()) {
  383. if (Index)
  384. report_fatal_error("error: -summary-file and index from frontend\n");
  385. std::string Error;
  386. IndexPtr =
  387. getModuleSummaryIndexForFile(SummaryFile, Error, diagnosticHandler);
  388. if (!IndexPtr) {
  389. errs() << "Error loading file '" << SummaryFile << "': " << Error
  390. << "\n";
  391. return false;
  392. }
  393. Index = IndexPtr.get();
  394. }
  395. // First step is collecting the import/export lists
  396. // The export list is not used yet, but could limit the amount of renaming
  397. // performed in renameModuleForThinLTO()
  398. StringMap<FunctionImporter::ImportMapTy> ImportLists;
  399. StringMap<FunctionImporter::ExportSetTy> ExportLists;
  400. ComputeCrossModuleImport(*Index, ImportLists, ExportLists);
  401. auto &ImportList = ImportLists[M.getModuleIdentifier()];
  402. // Next we need to promote to global scope and rename any local values that
  403. // are potentially exported to other modules.
  404. if (renameModuleForThinLTO(M, *Index, nullptr)) {
  405. errs() << "Error renaming module\n";
  406. return false;
  407. }
  408. // Perform the import now.
  409. auto ModuleLoader = [&M](StringRef Identifier) {
  410. return loadFile(Identifier, M.getContext());
  411. };
  412. FunctionImporter Importer(*Index, ModuleLoader);
  413. return Importer.importFunctions(M, ImportList);
  414. }
  415. };
  416. } // anonymous namespace
  417. char FunctionImportPass::ID = 0;
  418. INITIALIZE_PASS_BEGIN(FunctionImportPass, "function-import",
  419. "Summary Based Function Import", false, false)
  420. INITIALIZE_PASS_END(FunctionImportPass, "function-import",
  421. "Summary Based Function Import", false, false)
  422. namespace llvm {
  423. Pass *createFunctionImportPass(const ModuleSummaryIndex *Index = nullptr) {
  424. return new FunctionImportPass(Index);
  425. }
  426. }