ModuleSummaryAnalysis.cpp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881
  1. //===- ModuleSummaryAnalysis.cpp - Module summary index builder -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass builds a ModuleSummaryIndex object for the module, to be written
  10. // to bitcode or LLVM assembly.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Analysis/ModuleSummaryAnalysis.h"
  14. #include "llvm/ADT/ArrayRef.h"
  15. #include "llvm/ADT/DenseSet.h"
  16. #include "llvm/ADT/MapVector.h"
  17. #include "llvm/ADT/STLExtras.h"
  18. #include "llvm/ADT/SetVector.h"
  19. #include "llvm/ADT/SmallPtrSet.h"
  20. #include "llvm/ADT/SmallVector.h"
  21. #include "llvm/ADT/StringRef.h"
  22. #include "llvm/Analysis/BlockFrequencyInfo.h"
  23. #include "llvm/Analysis/BranchProbabilityInfo.h"
  24. #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
  25. #include "llvm/Analysis/LoopInfo.h"
  26. #include "llvm/Analysis/ProfileSummaryInfo.h"
  27. #include "llvm/Analysis/TypeMetadataUtils.h"
  28. #include "llvm/IR/Attributes.h"
  29. #include "llvm/IR/BasicBlock.h"
  30. #include "llvm/IR/CallSite.h"
  31. #include "llvm/IR/Constant.h"
  32. #include "llvm/IR/Constants.h"
  33. #include "llvm/IR/Dominators.h"
  34. #include "llvm/IR/Function.h"
  35. #include "llvm/IR/GlobalAlias.h"
  36. #include "llvm/IR/GlobalValue.h"
  37. #include "llvm/IR/GlobalVariable.h"
  38. #include "llvm/IR/Instructions.h"
  39. #include "llvm/IR/IntrinsicInst.h"
  40. #include "llvm/IR/Intrinsics.h"
  41. #include "llvm/IR/Metadata.h"
  42. #include "llvm/IR/Module.h"
  43. #include "llvm/IR/ModuleSummaryIndex.h"
  44. #include "llvm/IR/Use.h"
  45. #include "llvm/IR/User.h"
  46. #include "llvm/Object/ModuleSymbolTable.h"
  47. #include "llvm/Object/SymbolicFile.h"
  48. #include "llvm/Pass.h"
  49. #include "llvm/Support/Casting.h"
  50. #include "llvm/Support/CommandLine.h"
  51. #include <algorithm>
  52. #include <cassert>
  53. #include <cstdint>
  54. #include <vector>
  55. using namespace llvm;
  56. #define DEBUG_TYPE "module-summary-analysis"
  57. // Option to force edges cold which will block importing when the
  58. // -import-cold-multiplier is set to 0. Useful for debugging.
  59. FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold =
  60. FunctionSummary::FSHT_None;
  61. cl::opt<FunctionSummary::ForceSummaryHotnessType, true> FSEC(
  62. "force-summary-edges-cold", cl::Hidden, cl::location(ForceSummaryEdgesCold),
  63. cl::desc("Force all edges in the function summary to cold"),
  64. cl::values(clEnumValN(FunctionSummary::FSHT_None, "none", "None."),
  65. clEnumValN(FunctionSummary::FSHT_AllNonCritical,
  66. "all-non-critical", "All non-critical edges."),
  67. clEnumValN(FunctionSummary::FSHT_All, "all", "All edges.")));
  68. cl::opt<std::string> ModuleSummaryDotFile(
  69. "module-summary-dot-file", cl::init(""), cl::Hidden,
  70. cl::value_desc("filename"),
  71. cl::desc("File to emit dot graph of new summary into."));
  72. // Walk through the operands of a given User via worklist iteration and populate
  73. // the set of GlobalValue references encountered. Invoked either on an
  74. // Instruction or a GlobalVariable (which walks its initializer).
  75. // Return true if any of the operands contains blockaddress. This is important
  76. // to know when computing summary for global var, because if global variable
  77. // references basic block address we can't import it separately from function
  78. // containing that basic block. For simplicity we currently don't import such
  79. // global vars at all. When importing function we aren't interested if any
  80. // instruction in it takes an address of any basic block, because instruction
  81. // can only take an address of basic block located in the same function.
  82. static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser,
  83. SetVector<ValueInfo> &RefEdges,
  84. SmallPtrSet<const User *, 8> &Visited) {
  85. bool HasBlockAddress = false;
  86. SmallVector<const User *, 32> Worklist;
  87. Worklist.push_back(CurUser);
  88. while (!Worklist.empty()) {
  89. const User *U = Worklist.pop_back_val();
  90. if (!Visited.insert(U).second)
  91. continue;
  92. ImmutableCallSite CS(U);
  93. for (const auto &OI : U->operands()) {
  94. const User *Operand = dyn_cast<User>(OI);
  95. if (!Operand)
  96. continue;
  97. if (isa<BlockAddress>(Operand)) {
  98. HasBlockAddress = true;
  99. continue;
  100. }
  101. if (auto *GV = dyn_cast<GlobalValue>(Operand)) {
  102. // We have a reference to a global value. This should be added to
  103. // the reference set unless it is a callee. Callees are handled
  104. // specially by WriteFunction and are added to a separate list.
  105. if (!(CS && CS.isCallee(&OI)))
  106. RefEdges.insert(Index.getOrInsertValueInfo(GV));
  107. continue;
  108. }
  109. Worklist.push_back(Operand);
  110. }
  111. }
  112. return HasBlockAddress;
  113. }
  114. static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount,
  115. ProfileSummaryInfo *PSI) {
  116. if (!PSI)
  117. return CalleeInfo::HotnessType::Unknown;
  118. if (PSI->isHotCount(ProfileCount))
  119. return CalleeInfo::HotnessType::Hot;
  120. if (PSI->isColdCount(ProfileCount))
  121. return CalleeInfo::HotnessType::Cold;
  122. return CalleeInfo::HotnessType::None;
  123. }
  124. static bool isNonRenamableLocal(const GlobalValue &GV) {
  125. return GV.hasSection() && GV.hasLocalLinkage();
  126. }
  127. /// Determine whether this call has all constant integer arguments (excluding
  128. /// "this") and summarize it to VCalls or ConstVCalls as appropriate.
  129. static void addVCallToSet(DevirtCallSite Call, GlobalValue::GUID Guid,
  130. SetVector<FunctionSummary::VFuncId> &VCalls,
  131. SetVector<FunctionSummary::ConstVCall> &ConstVCalls) {
  132. std::vector<uint64_t> Args;
  133. // Start from the second argument to skip the "this" pointer.
  134. for (auto &Arg : make_range(Call.CS.arg_begin() + 1, Call.CS.arg_end())) {
  135. auto *CI = dyn_cast<ConstantInt>(Arg);
  136. if (!CI || CI->getBitWidth() > 64) {
  137. VCalls.insert({Guid, Call.Offset});
  138. return;
  139. }
  140. Args.push_back(CI->getZExtValue());
  141. }
  142. ConstVCalls.insert({{Guid, Call.Offset}, std::move(Args)});
  143. }
  144. /// If this intrinsic call requires that we add information to the function
  145. /// summary, do so via the non-constant reference arguments.
  146. static void addIntrinsicToSummary(
  147. const CallInst *CI, SetVector<GlobalValue::GUID> &TypeTests,
  148. SetVector<FunctionSummary::VFuncId> &TypeTestAssumeVCalls,
  149. SetVector<FunctionSummary::VFuncId> &TypeCheckedLoadVCalls,
  150. SetVector<FunctionSummary::ConstVCall> &TypeTestAssumeConstVCalls,
  151. SetVector<FunctionSummary::ConstVCall> &TypeCheckedLoadConstVCalls,
  152. DominatorTree &DT) {
  153. switch (CI->getCalledFunction()->getIntrinsicID()) {
  154. case Intrinsic::type_test: {
  155. auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
  156. auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
  157. if (!TypeId)
  158. break;
  159. GlobalValue::GUID Guid = GlobalValue::getGUID(TypeId->getString());
  160. // Produce a summary from type.test intrinsics. We only summarize type.test
  161. // intrinsics that are used other than by an llvm.assume intrinsic.
  162. // Intrinsics that are assumed are relevant only to the devirtualization
  163. // pass, not the type test lowering pass.
  164. bool HasNonAssumeUses = llvm::any_of(CI->uses(), [](const Use &CIU) {
  165. auto *AssumeCI = dyn_cast<CallInst>(CIU.getUser());
  166. if (!AssumeCI)
  167. return true;
  168. Function *F = AssumeCI->getCalledFunction();
  169. return !F || F->getIntrinsicID() != Intrinsic::assume;
  170. });
  171. if (HasNonAssumeUses)
  172. TypeTests.insert(Guid);
  173. SmallVector<DevirtCallSite, 4> DevirtCalls;
  174. SmallVector<CallInst *, 4> Assumes;
  175. findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
  176. for (auto &Call : DevirtCalls)
  177. addVCallToSet(Call, Guid, TypeTestAssumeVCalls,
  178. TypeTestAssumeConstVCalls);
  179. break;
  180. }
  181. case Intrinsic::type_checked_load: {
  182. auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(2));
  183. auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
  184. if (!TypeId)
  185. break;
  186. GlobalValue::GUID Guid = GlobalValue::getGUID(TypeId->getString());
  187. SmallVector<DevirtCallSite, 4> DevirtCalls;
  188. SmallVector<Instruction *, 4> LoadedPtrs;
  189. SmallVector<Instruction *, 4> Preds;
  190. bool HasNonCallUses = false;
  191. findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
  192. HasNonCallUses, CI, DT);
  193. // Any non-call uses of the result of llvm.type.checked.load will
  194. // prevent us from optimizing away the llvm.type.test.
  195. if (HasNonCallUses)
  196. TypeTests.insert(Guid);
  197. for (auto &Call : DevirtCalls)
  198. addVCallToSet(Call, Guid, TypeCheckedLoadVCalls,
  199. TypeCheckedLoadConstVCalls);
  200. break;
  201. }
  202. default:
  203. break;
  204. }
  205. }
  206. static bool isNonVolatileLoad(const Instruction *I) {
  207. if (const auto *LI = dyn_cast<LoadInst>(I))
  208. return !LI->isVolatile();
  209. return false;
  210. }
  211. static bool isNonVolatileStore(const Instruction *I) {
  212. if (const auto *SI = dyn_cast<StoreInst>(I))
  213. return !SI->isVolatile();
  214. return false;
  215. }
  216. static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
  217. const Function &F, BlockFrequencyInfo *BFI,
  218. ProfileSummaryInfo *PSI, DominatorTree &DT,
  219. bool HasLocalsInUsedOrAsm,
  220. DenseSet<GlobalValue::GUID> &CantBePromoted,
  221. bool IsThinLTO) {
  222. // Summary not currently supported for anonymous functions, they should
  223. // have been named.
  224. assert(F.hasName());
  225. unsigned NumInsts = 0;
  226. // Map from callee ValueId to profile count. Used to accumulate profile
  227. // counts for all static calls to a given callee.
  228. MapVector<ValueInfo, CalleeInfo> CallGraphEdges;
  229. SetVector<ValueInfo> RefEdges, LoadRefEdges, StoreRefEdges;
  230. SetVector<GlobalValue::GUID> TypeTests;
  231. SetVector<FunctionSummary::VFuncId> TypeTestAssumeVCalls,
  232. TypeCheckedLoadVCalls;
  233. SetVector<FunctionSummary::ConstVCall> TypeTestAssumeConstVCalls,
  234. TypeCheckedLoadConstVCalls;
  235. ICallPromotionAnalysis ICallAnalysis;
  236. SmallPtrSet<const User *, 8> Visited;
  237. // Add personality function, prefix data and prologue data to function's ref
  238. // list.
  239. findRefEdges(Index, &F, RefEdges, Visited);
  240. std::vector<const Instruction *> NonVolatileLoads;
  241. std::vector<const Instruction *> NonVolatileStores;
  242. bool HasInlineAsmMaybeReferencingInternal = false;
  243. for (const BasicBlock &BB : F)
  244. for (const Instruction &I : BB) {
  245. if (isa<DbgInfoIntrinsic>(I))
  246. continue;
  247. ++NumInsts;
  248. // Regular LTO module doesn't participate in ThinLTO import,
  249. // so no reference from it can be read/writeonly, since this
  250. // would require importing variable as local copy
  251. if (IsThinLTO) {
  252. if (isNonVolatileLoad(&I)) {
  253. // Postpone processing of non-volatile load instructions
  254. // See comments below
  255. Visited.insert(&I);
  256. NonVolatileLoads.push_back(&I);
  257. continue;
  258. } else if (isNonVolatileStore(&I)) {
  259. Visited.insert(&I);
  260. NonVolatileStores.push_back(&I);
  261. // All references from second operand of store (destination address)
  262. // can be considered write-only if they're not referenced by any
  263. // non-store instruction. References from first operand of store
  264. // (stored value) can't be treated either as read- or as write-only
  265. // so we add them to RefEdges as we do with all other instructions
  266. // except non-volatile load.
  267. Value *Stored = I.getOperand(0);
  268. if (auto *GV = dyn_cast<GlobalValue>(Stored))
  269. // findRefEdges will try to examine GV operands, so instead
  270. // of calling it we should add GV to RefEdges directly.
  271. RefEdges.insert(Index.getOrInsertValueInfo(GV));
  272. else if (auto *U = dyn_cast<User>(Stored))
  273. findRefEdges(Index, U, RefEdges, Visited);
  274. continue;
  275. }
  276. }
  277. findRefEdges(Index, &I, RefEdges, Visited);
  278. auto CS = ImmutableCallSite(&I);
  279. if (!CS)
  280. continue;
  281. const auto *CI = dyn_cast<CallInst>(&I);
  282. // Since we don't know exactly which local values are referenced in inline
  283. // assembly, conservatively mark the function as possibly referencing
  284. // a local value from inline assembly to ensure we don't export a
  285. // reference (which would require renaming and promotion of the
  286. // referenced value).
  287. if (HasLocalsInUsedOrAsm && CI && CI->isInlineAsm())
  288. HasInlineAsmMaybeReferencingInternal = true;
  289. auto *CalledValue = CS.getCalledValue();
  290. auto *CalledFunction = CS.getCalledFunction();
  291. if (CalledValue && !CalledFunction) {
  292. CalledValue = CalledValue->stripPointerCastsNoFollowAliases();
  293. // Stripping pointer casts can reveal a called function.
  294. CalledFunction = dyn_cast<Function>(CalledValue);
  295. }
  296. // Check if this is an alias to a function. If so, get the
  297. // called aliasee for the checks below.
  298. if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
  299. assert(!CalledFunction && "Expected null called function in callsite for alias");
  300. CalledFunction = dyn_cast<Function>(GA->getBaseObject());
  301. }
  302. // Check if this is a direct call to a known function or a known
  303. // intrinsic, or an indirect call with profile data.
  304. if (CalledFunction) {
  305. if (CI && CalledFunction->isIntrinsic()) {
  306. addIntrinsicToSummary(
  307. CI, TypeTests, TypeTestAssumeVCalls, TypeCheckedLoadVCalls,
  308. TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls, DT);
  309. continue;
  310. }
  311. // We should have named any anonymous globals
  312. assert(CalledFunction->hasName());
  313. auto ScaledCount = PSI->getProfileCount(&I, BFI);
  314. auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI)
  315. : CalleeInfo::HotnessType::Unknown;
  316. if (ForceSummaryEdgesCold != FunctionSummary::FSHT_None)
  317. Hotness = CalleeInfo::HotnessType::Cold;
  318. // Use the original CalledValue, in case it was an alias. We want
  319. // to record the call edge to the alias in that case. Eventually
  320. // an alias summary will be created to associate the alias and
  321. // aliasee.
  322. auto &ValueInfo = CallGraphEdges[Index.getOrInsertValueInfo(
  323. cast<GlobalValue>(CalledValue))];
  324. ValueInfo.updateHotness(Hotness);
  325. // Add the relative block frequency to CalleeInfo if there is no profile
  326. // information.
  327. if (BFI != nullptr && Hotness == CalleeInfo::HotnessType::Unknown) {
  328. uint64_t BBFreq = BFI->getBlockFreq(&BB).getFrequency();
  329. uint64_t EntryFreq = BFI->getEntryFreq();
  330. ValueInfo.updateRelBlockFreq(BBFreq, EntryFreq);
  331. }
  332. } else {
  333. // Skip inline assembly calls.
  334. if (CI && CI->isInlineAsm())
  335. continue;
  336. // Skip direct calls.
  337. if (!CalledValue || isa<Constant>(CalledValue))
  338. continue;
  339. // Check if the instruction has a callees metadata. If so, add callees
  340. // to CallGraphEdges to reflect the references from the metadata, and
  341. // to enable importing for subsequent indirect call promotion and
  342. // inlining.
  343. if (auto *MD = I.getMetadata(LLVMContext::MD_callees)) {
  344. for (auto &Op : MD->operands()) {
  345. Function *Callee = mdconst::extract_or_null<Function>(Op);
  346. if (Callee)
  347. CallGraphEdges[Index.getOrInsertValueInfo(Callee)];
  348. }
  349. }
  350. uint32_t NumVals, NumCandidates;
  351. uint64_t TotalCount;
  352. auto CandidateProfileData =
  353. ICallAnalysis.getPromotionCandidatesForInstruction(
  354. &I, NumVals, TotalCount, NumCandidates);
  355. for (auto &Candidate : CandidateProfileData)
  356. CallGraphEdges[Index.getOrInsertValueInfo(Candidate.Value)]
  357. .updateHotness(getHotness(Candidate.Count, PSI));
  358. }
  359. }
  360. std::vector<ValueInfo> Refs;
  361. if (IsThinLTO) {
  362. auto AddRefEdges = [&](const std::vector<const Instruction *> &Instrs,
  363. SetVector<ValueInfo> &Edges,
  364. SmallPtrSet<const User *, 8> &Cache) {
  365. for (const auto *I : Instrs) {
  366. Cache.erase(I);
  367. findRefEdges(Index, I, Edges, Cache);
  368. }
  369. };
  370. // By now we processed all instructions in a function, except
  371. // non-volatile loads and non-volatile value stores. Let's find
  372. // ref edges for both of instruction sets
  373. AddRefEdges(NonVolatileLoads, LoadRefEdges, Visited);
  374. // We can add some values to the Visited set when processing load
  375. // instructions which are also used by stores in NonVolatileStores.
  376. // For example this can happen if we have following code:
  377. //
  378. // store %Derived* @foo, %Derived** bitcast (%Base** @bar to %Derived**)
  379. // %42 = load %Derived*, %Derived** bitcast (%Base** @bar to %Derived**)
  380. //
  381. // After processing loads we'll add bitcast to the Visited set, and if
  382. // we use the same set while processing stores, we'll never see store
  383. // to @bar and @bar will be mistakenly treated as readonly.
  384. SmallPtrSet<const llvm::User *, 8> StoreCache;
  385. AddRefEdges(NonVolatileStores, StoreRefEdges, StoreCache);
  386. // If both load and store instruction reference the same variable
  387. // we won't be able to optimize it. Add all such reference edges
  388. // to RefEdges set.
  389. for (auto &VI : StoreRefEdges)
  390. if (LoadRefEdges.remove(VI))
  391. RefEdges.insert(VI);
  392. unsigned RefCnt = RefEdges.size();
  393. // All new reference edges inserted in two loops below are either
  394. // read or write only. They will be grouped in the end of RefEdges
  395. // vector, so we can use a single integer value to identify them.
  396. for (auto &VI : LoadRefEdges)
  397. RefEdges.insert(VI);
  398. unsigned FirstWORef = RefEdges.size();
  399. for (auto &VI : StoreRefEdges)
  400. RefEdges.insert(VI);
  401. Refs = RefEdges.takeVector();
  402. for (; RefCnt < FirstWORef; ++RefCnt)
  403. Refs[RefCnt].setReadOnly();
  404. for (; RefCnt < Refs.size(); ++RefCnt)
  405. Refs[RefCnt].setWriteOnly();
  406. } else {
  407. Refs = RefEdges.takeVector();
  408. }
  409. // Explicit add hot edges to enforce importing for designated GUIDs for
  410. // sample PGO, to enable the same inlines as the profiled optimized binary.
  411. for (auto &I : F.getImportGUIDs())
  412. CallGraphEdges[Index.getOrInsertValueInfo(I)].updateHotness(
  413. ForceSummaryEdgesCold == FunctionSummary::FSHT_All
  414. ? CalleeInfo::HotnessType::Cold
  415. : CalleeInfo::HotnessType::Critical);
  416. bool NonRenamableLocal = isNonRenamableLocal(F);
  417. bool NotEligibleForImport =
  418. NonRenamableLocal || HasInlineAsmMaybeReferencingInternal;
  419. GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
  420. /* Live = */ false, F.isDSOLocal(),
  421. F.hasLinkOnceODRLinkage() && F.hasGlobalUnnamedAddr());
  422. FunctionSummary::FFlags FunFlags{
  423. F.hasFnAttribute(Attribute::ReadNone),
  424. F.hasFnAttribute(Attribute::ReadOnly),
  425. F.hasFnAttribute(Attribute::NoRecurse), F.returnDoesNotAlias(),
  426. // FIXME: refactor this to use the same code that inliner is using.
  427. // Don't try to import functions with noinline attribute.
  428. F.getAttributes().hasFnAttribute(Attribute::NoInline)};
  429. auto FuncSummary = llvm::make_unique<FunctionSummary>(
  430. Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs),
  431. CallGraphEdges.takeVector(), TypeTests.takeVector(),
  432. TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(),
  433. TypeTestAssumeConstVCalls.takeVector(),
  434. TypeCheckedLoadConstVCalls.takeVector());
  435. if (NonRenamableLocal)
  436. CantBePromoted.insert(F.getGUID());
  437. Index.addGlobalValueSummary(F, std::move(FuncSummary));
  438. }
  439. /// Find function pointers referenced within the given vtable initializer
  440. /// (or subset of an initializer) \p I. The starting offset of \p I within
  441. /// the vtable initializer is \p StartingOffset. Any discovered function
  442. /// pointers are added to \p VTableFuncs along with their cumulative offset
  443. /// within the initializer.
  444. static void findFuncPointers(const Constant *I, uint64_t StartingOffset,
  445. const Module &M, ModuleSummaryIndex &Index,
  446. VTableFuncList &VTableFuncs) {
  447. // First check if this is a function pointer.
  448. if (I->getType()->isPointerTy()) {
  449. auto Fn = dyn_cast<Function>(I->stripPointerCasts());
  450. // We can disregard __cxa_pure_virtual as a possible call target, as
  451. // calls to pure virtuals are UB.
  452. if (Fn && Fn->getName() != "__cxa_pure_virtual")
  453. VTableFuncs.push_back({Index.getOrInsertValueInfo(Fn), StartingOffset});
  454. return;
  455. }
  456. // Walk through the elements in the constant struct or array and recursively
  457. // look for virtual function pointers.
  458. const DataLayout &DL = M.getDataLayout();
  459. if (auto *C = dyn_cast<ConstantStruct>(I)) {
  460. StructType *STy = dyn_cast<StructType>(C->getType());
  461. assert(STy);
  462. const StructLayout *SL = DL.getStructLayout(C->getType());
  463. for (StructType::element_iterator EB = STy->element_begin(), EI = EB,
  464. EE = STy->element_end();
  465. EI != EE; ++EI) {
  466. auto Offset = SL->getElementOffset(EI - EB);
  467. unsigned Op = SL->getElementContainingOffset(Offset);
  468. findFuncPointers(cast<Constant>(I->getOperand(Op)),
  469. StartingOffset + Offset, M, Index, VTableFuncs);
  470. }
  471. } else if (auto *C = dyn_cast<ConstantArray>(I)) {
  472. ArrayType *ATy = C->getType();
  473. Type *EltTy = ATy->getElementType();
  474. uint64_t EltSize = DL.getTypeAllocSize(EltTy);
  475. for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
  476. findFuncPointers(cast<Constant>(I->getOperand(i)),
  477. StartingOffset + i * EltSize, M, Index, VTableFuncs);
  478. }
  479. }
  480. }
  481. // Identify the function pointers referenced by vtable definition \p V.
  482. static void computeVTableFuncs(ModuleSummaryIndex &Index,
  483. const GlobalVariable &V, const Module &M,
  484. VTableFuncList &VTableFuncs) {
  485. if (!V.isConstant())
  486. return;
  487. findFuncPointers(V.getInitializer(), /*StartingOffset=*/0, M, Index,
  488. VTableFuncs);
  489. #ifndef NDEBUG
  490. // Validate that the VTableFuncs list is ordered by offset.
  491. uint64_t PrevOffset = 0;
  492. for (auto &P : VTableFuncs) {
  493. // The findVFuncPointers traversal should have encountered the
  494. // functions in offset order. We need to use ">=" since PrevOffset
  495. // starts at 0.
  496. assert(P.VTableOffset >= PrevOffset);
  497. PrevOffset = P.VTableOffset;
  498. }
  499. #endif
  500. }
  501. /// Record vtable definition \p V for each type metadata it references.
  502. static void
  503. recordTypeIdCompatibleVtableReferences(ModuleSummaryIndex &Index,
  504. const GlobalVariable &V,
  505. SmallVectorImpl<MDNode *> &Types) {
  506. for (MDNode *Type : Types) {
  507. auto TypeID = Type->getOperand(1).get();
  508. uint64_t Offset =
  509. cast<ConstantInt>(
  510. cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
  511. ->getZExtValue();
  512. if (auto *TypeId = dyn_cast<MDString>(TypeID))
  513. Index.getOrInsertTypeIdCompatibleVtableSummary(TypeId->getString())
  514. .push_back({Offset, Index.getOrInsertValueInfo(&V)});
  515. }
  516. }
  517. static void computeVariableSummary(ModuleSummaryIndex &Index,
  518. const GlobalVariable &V,
  519. DenseSet<GlobalValue::GUID> &CantBePromoted,
  520. const Module &M,
  521. SmallVectorImpl<MDNode *> &Types) {
  522. SetVector<ValueInfo> RefEdges;
  523. SmallPtrSet<const User *, 8> Visited;
  524. bool HasBlockAddress = findRefEdges(Index, &V, RefEdges, Visited);
  525. bool NonRenamableLocal = isNonRenamableLocal(V);
  526. GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal,
  527. /* Live = */ false, V.isDSOLocal(),
  528. V.hasLinkOnceODRLinkage() && V.hasGlobalUnnamedAddr());
  529. VTableFuncList VTableFuncs;
  530. // If splitting is not enabled, then we compute the summary information
  531. // necessary for index-based whole program devirtualization.
  532. if (!Index.enableSplitLTOUnit()) {
  533. Types.clear();
  534. V.getMetadata(LLVMContext::MD_type, Types);
  535. if (!Types.empty()) {
  536. // Identify the function pointers referenced by this vtable definition.
  537. computeVTableFuncs(Index, V, M, VTableFuncs);
  538. // Record this vtable definition for each type metadata it references.
  539. recordTypeIdCompatibleVtableReferences(Index, V, Types);
  540. }
  541. }
  542. // Don't mark variables we won't be able to internalize as read/write-only.
  543. bool CanBeInternalized =
  544. !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() &&
  545. !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass();
  546. GlobalVarSummary::GVarFlags VarFlags(CanBeInternalized, CanBeInternalized);
  547. auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, VarFlags,
  548. RefEdges.takeVector());
  549. if (NonRenamableLocal)
  550. CantBePromoted.insert(V.getGUID());
  551. if (HasBlockAddress)
  552. GVarSummary->setNotEligibleToImport();
  553. if (!VTableFuncs.empty())
  554. GVarSummary->setVTableFuncs(VTableFuncs);
  555. Index.addGlobalValueSummary(V, std::move(GVarSummary));
  556. }
  557. static void
  558. computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
  559. DenseSet<GlobalValue::GUID> &CantBePromoted) {
  560. bool NonRenamableLocal = isNonRenamableLocal(A);
  561. GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal,
  562. /* Live = */ false, A.isDSOLocal(),
  563. A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr());
  564. auto AS = llvm::make_unique<AliasSummary>(Flags);
  565. auto *Aliasee = A.getBaseObject();
  566. auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID());
  567. assert(AliaseeVI && "Alias expects aliasee summary to be available");
  568. assert(AliaseeVI.getSummaryList().size() == 1 &&
  569. "Expected a single entry per aliasee in per-module index");
  570. AS->setAliasee(AliaseeVI, AliaseeVI.getSummaryList()[0].get());
  571. if (NonRenamableLocal)
  572. CantBePromoted.insert(A.getGUID());
  573. Index.addGlobalValueSummary(A, std::move(AS));
  574. }
  575. // Set LiveRoot flag on entries matching the given value name.
  576. static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) {
  577. if (ValueInfo VI = Index.getValueInfo(GlobalValue::getGUID(Name)))
  578. for (auto &Summary : VI.getSummaryList())
  579. Summary->setLive(true);
  580. }
  581. ModuleSummaryIndex llvm::buildModuleSummaryIndex(
  582. const Module &M,
  583. std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
  584. ProfileSummaryInfo *PSI) {
  585. assert(PSI);
  586. bool EnableSplitLTOUnit = false;
  587. if (auto *MD = mdconst::extract_or_null<ConstantInt>(
  588. M.getModuleFlag("EnableSplitLTOUnit")))
  589. EnableSplitLTOUnit = MD->getZExtValue();
  590. ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit);
  591. // Identify the local values in the llvm.used and llvm.compiler.used sets,
  592. // which should not be exported as they would then require renaming and
  593. // promotion, but we may have opaque uses e.g. in inline asm. We collect them
  594. // here because we use this information to mark functions containing inline
  595. // assembly calls as not importable.
  596. SmallPtrSet<GlobalValue *, 8> LocalsUsed;
  597. SmallPtrSet<GlobalValue *, 8> Used;
  598. // First collect those in the llvm.used set.
  599. collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
  600. // Next collect those in the llvm.compiler.used set.
  601. collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ true);
  602. DenseSet<GlobalValue::GUID> CantBePromoted;
  603. for (auto *V : Used) {
  604. if (V->hasLocalLinkage()) {
  605. LocalsUsed.insert(V);
  606. CantBePromoted.insert(V->getGUID());
  607. }
  608. }
  609. bool HasLocalInlineAsmSymbol = false;
  610. if (!M.getModuleInlineAsm().empty()) {
  611. // Collect the local values defined by module level asm, and set up
  612. // summaries for these symbols so that they can be marked as NoRename,
  613. // to prevent export of any use of them in regular IR that would require
  614. // renaming within the module level asm. Note we don't need to create a
  615. // summary for weak or global defs, as they don't need to be flagged as
  616. // NoRename, and defs in module level asm can't be imported anyway.
  617. // Also, any values used but not defined within module level asm should
  618. // be listed on the llvm.used or llvm.compiler.used global and marked as
  619. // referenced from there.
  620. ModuleSymbolTable::CollectAsmSymbols(
  621. M, [&](StringRef Name, object::BasicSymbolRef::Flags Flags) {
  622. // Symbols not marked as Weak or Global are local definitions.
  623. if (Flags & (object::BasicSymbolRef::SF_Weak |
  624. object::BasicSymbolRef::SF_Global))
  625. return;
  626. HasLocalInlineAsmSymbol = true;
  627. GlobalValue *GV = M.getNamedValue(Name);
  628. if (!GV)
  629. return;
  630. assert(GV->isDeclaration() && "Def in module asm already has definition");
  631. GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage,
  632. /* NotEligibleToImport = */ true,
  633. /* Live = */ true,
  634. /* Local */ GV->isDSOLocal(),
  635. GV->hasLinkOnceODRLinkage() && GV->hasGlobalUnnamedAddr());
  636. CantBePromoted.insert(GV->getGUID());
  637. // Create the appropriate summary type.
  638. if (Function *F = dyn_cast<Function>(GV)) {
  639. std::unique_ptr<FunctionSummary> Summary =
  640. llvm::make_unique<FunctionSummary>(
  641. GVFlags, /*InstCount=*/0,
  642. FunctionSummary::FFlags{
  643. F->hasFnAttribute(Attribute::ReadNone),
  644. F->hasFnAttribute(Attribute::ReadOnly),
  645. F->hasFnAttribute(Attribute::NoRecurse),
  646. F->returnDoesNotAlias(),
  647. /* NoInline = */ false},
  648. /*EntryCount=*/0, ArrayRef<ValueInfo>{},
  649. ArrayRef<FunctionSummary::EdgeTy>{},
  650. ArrayRef<GlobalValue::GUID>{},
  651. ArrayRef<FunctionSummary::VFuncId>{},
  652. ArrayRef<FunctionSummary::VFuncId>{},
  653. ArrayRef<FunctionSummary::ConstVCall>{},
  654. ArrayRef<FunctionSummary::ConstVCall>{});
  655. Index.addGlobalValueSummary(*GV, std::move(Summary));
  656. } else {
  657. std::unique_ptr<GlobalVarSummary> Summary =
  658. llvm::make_unique<GlobalVarSummary>(
  659. GVFlags, GlobalVarSummary::GVarFlags(false, false),
  660. ArrayRef<ValueInfo>{});
  661. Index.addGlobalValueSummary(*GV, std::move(Summary));
  662. }
  663. });
  664. }
  665. bool IsThinLTO = true;
  666. if (auto *MD =
  667. mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("ThinLTO")))
  668. IsThinLTO = MD->getZExtValue();
  669. // Compute summaries for all functions defined in module, and save in the
  670. // index.
  671. for (auto &F : M) {
  672. if (F.isDeclaration())
  673. continue;
  674. DominatorTree DT(const_cast<Function &>(F));
  675. BlockFrequencyInfo *BFI = nullptr;
  676. std::unique_ptr<BlockFrequencyInfo> BFIPtr;
  677. if (GetBFICallback)
  678. BFI = GetBFICallback(F);
  679. else if (F.hasProfileData()) {
  680. LoopInfo LI{DT};
  681. BranchProbabilityInfo BPI{F, LI};
  682. BFIPtr = llvm::make_unique<BlockFrequencyInfo>(F, BPI, LI);
  683. BFI = BFIPtr.get();
  684. }
  685. computeFunctionSummary(Index, M, F, BFI, PSI, DT,
  686. !LocalsUsed.empty() || HasLocalInlineAsmSymbol,
  687. CantBePromoted, IsThinLTO);
  688. }
  689. // Compute summaries for all variables defined in module, and save in the
  690. // index.
  691. SmallVector<MDNode *, 2> Types;
  692. for (const GlobalVariable &G : M.globals()) {
  693. if (G.isDeclaration())
  694. continue;
  695. computeVariableSummary(Index, G, CantBePromoted, M, Types);
  696. }
  697. // Compute summaries for all aliases defined in module, and save in the
  698. // index.
  699. for (const GlobalAlias &A : M.aliases())
  700. computeAliasSummary(Index, A, CantBePromoted);
  701. for (auto *V : LocalsUsed) {
  702. auto *Summary = Index.getGlobalValueSummary(*V);
  703. assert(Summary && "Missing summary for global value");
  704. Summary->setNotEligibleToImport();
  705. }
  706. // The linker doesn't know about these LLVM produced values, so we need
  707. // to flag them as live in the index to ensure index-based dead value
  708. // analysis treats them as live roots of the analysis.
  709. setLiveRoot(Index, "llvm.used");
  710. setLiveRoot(Index, "llvm.compiler.used");
  711. setLiveRoot(Index, "llvm.global_ctors");
  712. setLiveRoot(Index, "llvm.global_dtors");
  713. setLiveRoot(Index, "llvm.global.annotations");
  714. for (auto &GlobalList : Index) {
  715. // Ignore entries for references that are undefined in the current module.
  716. if (GlobalList.second.SummaryList.empty())
  717. continue;
  718. assert(GlobalList.second.SummaryList.size() == 1 &&
  719. "Expected module's index to have one summary per GUID");
  720. auto &Summary = GlobalList.second.SummaryList[0];
  721. if (!IsThinLTO) {
  722. Summary->setNotEligibleToImport();
  723. continue;
  724. }
  725. bool AllRefsCanBeExternallyReferenced =
  726. llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) {
  727. return !CantBePromoted.count(VI.getGUID());
  728. });
  729. if (!AllRefsCanBeExternallyReferenced) {
  730. Summary->setNotEligibleToImport();
  731. continue;
  732. }
  733. if (auto *FuncSummary = dyn_cast<FunctionSummary>(Summary.get())) {
  734. bool AllCallsCanBeExternallyReferenced = llvm::all_of(
  735. FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) {
  736. return !CantBePromoted.count(Edge.first.getGUID());
  737. });
  738. if (!AllCallsCanBeExternallyReferenced)
  739. Summary->setNotEligibleToImport();
  740. }
  741. }
  742. if (!ModuleSummaryDotFile.empty()) {
  743. std::error_code EC;
  744. raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::OF_None);
  745. if (EC)
  746. report_fatal_error(Twine("Failed to open dot file ") +
  747. ModuleSummaryDotFile + ": " + EC.message() + "\n");
  748. Index.exportToDot(OSDot);
  749. }
  750. return Index;
  751. }
  752. AnalysisKey ModuleSummaryIndexAnalysis::Key;
  753. ModuleSummaryIndex
  754. ModuleSummaryIndexAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
  755. ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
  756. auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
  757. return buildModuleSummaryIndex(
  758. M,
  759. [&FAM](const Function &F) {
  760. return &FAM.getResult<BlockFrequencyAnalysis>(
  761. *const_cast<Function *>(&F));
  762. },
  763. &PSI);
  764. }
  765. char ModuleSummaryIndexWrapperPass::ID = 0;
  766. INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis",
  767. "Module Summary Analysis", false, true)
  768. INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
  769. INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
  770. INITIALIZE_PASS_END(ModuleSummaryIndexWrapperPass, "module-summary-analysis",
  771. "Module Summary Analysis", false, true)
  772. ModulePass *llvm::createModuleSummaryIndexWrapperPass() {
  773. return new ModuleSummaryIndexWrapperPass();
  774. }
  775. ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass()
  776. : ModulePass(ID) {
  777. initializeModuleSummaryIndexWrapperPassPass(*PassRegistry::getPassRegistry());
  778. }
  779. bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) {
  780. auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  781. Index.emplace(buildModuleSummaryIndex(
  782. M,
  783. [this](const Function &F) {
  784. return &(this->getAnalysis<BlockFrequencyInfoWrapperPass>(
  785. *const_cast<Function *>(&F))
  786. .getBFI());
  787. },
  788. PSI));
  789. return false;
  790. }
  791. bool ModuleSummaryIndexWrapperPass::doFinalization(Module &M) {
  792. Index.reset();
  793. return false;
  794. }
  795. void ModuleSummaryIndexWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
  796. AU.setPreservesAll();
  797. AU.addRequired<BlockFrequencyInfoWrapperPass>();
  798. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  799. }