ModuleSummaryAnalysis.cpp 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813
  1. //===- ModuleSummaryAnalysis.cpp - Module summary index builder -----------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This pass builds a ModuleSummaryIndex object for the module, to be written
  10. // to bitcode or LLVM assembly.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Analysis/ModuleSummaryAnalysis.h"
  14. #include "llvm/ADT/ArrayRef.h"
  15. #include "llvm/ADT/DenseSet.h"
  16. #include "llvm/ADT/MapVector.h"
  17. #include "llvm/ADT/STLExtras.h"
  18. #include "llvm/ADT/SetVector.h"
  19. #include "llvm/ADT/SmallPtrSet.h"
  20. #include "llvm/ADT/SmallVector.h"
  21. #include "llvm/ADT/StringRef.h"
  22. #include "llvm/Analysis/BlockFrequencyInfo.h"
  23. #include "llvm/Analysis/BranchProbabilityInfo.h"
  24. #include "llvm/Analysis/IndirectCallPromotionAnalysis.h"
  25. #include "llvm/Analysis/LoopInfo.h"
  26. #include "llvm/Analysis/ProfileSummaryInfo.h"
  27. #include "llvm/Analysis/TypeMetadataUtils.h"
  28. #include "llvm/IR/Attributes.h"
  29. #include "llvm/IR/BasicBlock.h"
  30. #include "llvm/IR/CallSite.h"
  31. #include "llvm/IR/Constant.h"
  32. #include "llvm/IR/Constants.h"
  33. #include "llvm/IR/Dominators.h"
  34. #include "llvm/IR/Function.h"
  35. #include "llvm/IR/GlobalAlias.h"
  36. #include "llvm/IR/GlobalValue.h"
  37. #include "llvm/IR/GlobalVariable.h"
  38. #include "llvm/IR/Instructions.h"
  39. #include "llvm/IR/IntrinsicInst.h"
  40. #include "llvm/IR/Intrinsics.h"
  41. #include "llvm/IR/Metadata.h"
  42. #include "llvm/IR/Module.h"
  43. #include "llvm/IR/ModuleSummaryIndex.h"
  44. #include "llvm/IR/Use.h"
  45. #include "llvm/IR/User.h"
  46. #include "llvm/Object/ModuleSymbolTable.h"
  47. #include "llvm/Object/SymbolicFile.h"
  48. #include "llvm/Pass.h"
  49. #include "llvm/Support/Casting.h"
  50. #include "llvm/Support/CommandLine.h"
  51. #include <algorithm>
  52. #include <cassert>
  53. #include <cstdint>
  54. #include <vector>
  55. using namespace llvm;
  56. #define DEBUG_TYPE "module-summary-analysis"
  57. // Option to force edges cold which will block importing when the
  58. // -import-cold-multiplier is set to 0. Useful for debugging.
  59. FunctionSummary::ForceSummaryHotnessType ForceSummaryEdgesCold =
  60. FunctionSummary::FSHT_None;
  61. cl::opt<FunctionSummary::ForceSummaryHotnessType, true> FSEC(
  62. "force-summary-edges-cold", cl::Hidden, cl::location(ForceSummaryEdgesCold),
  63. cl::desc("Force all edges in the function summary to cold"),
  64. cl::values(clEnumValN(FunctionSummary::FSHT_None, "none", "None."),
  65. clEnumValN(FunctionSummary::FSHT_AllNonCritical,
  66. "all-non-critical", "All non-critical edges."),
  67. clEnumValN(FunctionSummary::FSHT_All, "all", "All edges.")));
  68. cl::opt<std::string> ModuleSummaryDotFile(
  69. "module-summary-dot-file", cl::init(""), cl::Hidden,
  70. cl::value_desc("filename"),
  71. cl::desc("File to emit dot graph of new summary into."));
  72. // Walk through the operands of a given User via worklist iteration and populate
  73. // the set of GlobalValue references encountered. Invoked either on an
  74. // Instruction or a GlobalVariable (which walks its initializer).
  75. // Return true if any of the operands contains blockaddress. This is important
  76. // to know when computing summary for global var, because if global variable
  77. // references basic block address we can't import it separately from function
  78. // containing that basic block. For simplicity we currently don't import such
  79. // global vars at all. When importing function we aren't interested if any
  80. // instruction in it takes an address of any basic block, because instruction
  81. // can only take an address of basic block located in the same function.
  82. static bool findRefEdges(ModuleSummaryIndex &Index, const User *CurUser,
  83. SetVector<ValueInfo> &RefEdges,
  84. SmallPtrSet<const User *, 8> &Visited) {
  85. bool HasBlockAddress = false;
  86. SmallVector<const User *, 32> Worklist;
  87. Worklist.push_back(CurUser);
  88. while (!Worklist.empty()) {
  89. const User *U = Worklist.pop_back_val();
  90. if (!Visited.insert(U).second)
  91. continue;
  92. ImmutableCallSite CS(U);
  93. for (const auto &OI : U->operands()) {
  94. const User *Operand = dyn_cast<User>(OI);
  95. if (!Operand)
  96. continue;
  97. if (isa<BlockAddress>(Operand)) {
  98. HasBlockAddress = true;
  99. continue;
  100. }
  101. if (auto *GV = dyn_cast<GlobalValue>(Operand)) {
  102. // We have a reference to a global value. This should be added to
  103. // the reference set unless it is a callee. Callees are handled
  104. // specially by WriteFunction and are added to a separate list.
  105. if (!(CS && CS.isCallee(&OI)))
  106. RefEdges.insert(Index.getOrInsertValueInfo(GV));
  107. continue;
  108. }
  109. Worklist.push_back(Operand);
  110. }
  111. }
  112. return HasBlockAddress;
  113. }
  114. static CalleeInfo::HotnessType getHotness(uint64_t ProfileCount,
  115. ProfileSummaryInfo *PSI) {
  116. if (!PSI)
  117. return CalleeInfo::HotnessType::Unknown;
  118. if (PSI->isHotCount(ProfileCount))
  119. return CalleeInfo::HotnessType::Hot;
  120. if (PSI->isColdCount(ProfileCount))
  121. return CalleeInfo::HotnessType::Cold;
  122. return CalleeInfo::HotnessType::None;
  123. }
  124. static bool isNonRenamableLocal(const GlobalValue &GV) {
  125. return GV.hasSection() && GV.hasLocalLinkage();
  126. }
  127. /// Determine whether this call has all constant integer arguments (excluding
  128. /// "this") and summarize it to VCalls or ConstVCalls as appropriate.
  129. static void addVCallToSet(DevirtCallSite Call, GlobalValue::GUID Guid,
  130. SetVector<FunctionSummary::VFuncId> &VCalls,
  131. SetVector<FunctionSummary::ConstVCall> &ConstVCalls) {
  132. std::vector<uint64_t> Args;
  133. // Start from the second argument to skip the "this" pointer.
  134. for (auto &Arg : make_range(Call.CS.arg_begin() + 1, Call.CS.arg_end())) {
  135. auto *CI = dyn_cast<ConstantInt>(Arg);
  136. if (!CI || CI->getBitWidth() > 64) {
  137. VCalls.insert({Guid, Call.Offset});
  138. return;
  139. }
  140. Args.push_back(CI->getZExtValue());
  141. }
  142. ConstVCalls.insert({{Guid, Call.Offset}, std::move(Args)});
  143. }
  144. /// If this intrinsic call requires that we add information to the function
  145. /// summary, do so via the non-constant reference arguments.
  146. static void addIntrinsicToSummary(
  147. const CallInst *CI, SetVector<GlobalValue::GUID> &TypeTests,
  148. SetVector<FunctionSummary::VFuncId> &TypeTestAssumeVCalls,
  149. SetVector<FunctionSummary::VFuncId> &TypeCheckedLoadVCalls,
  150. SetVector<FunctionSummary::ConstVCall> &TypeTestAssumeConstVCalls,
  151. SetVector<FunctionSummary::ConstVCall> &TypeCheckedLoadConstVCalls,
  152. DominatorTree &DT) {
  153. switch (CI->getCalledFunction()->getIntrinsicID()) {
  154. case Intrinsic::type_test: {
  155. auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(1));
  156. auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
  157. if (!TypeId)
  158. break;
  159. GlobalValue::GUID Guid = GlobalValue::getGUID(TypeId->getString());
  160. // Produce a summary from type.test intrinsics. We only summarize type.test
  161. // intrinsics that are used other than by an llvm.assume intrinsic.
  162. // Intrinsics that are assumed are relevant only to the devirtualization
  163. // pass, not the type test lowering pass.
  164. bool HasNonAssumeUses = llvm::any_of(CI->uses(), [](const Use &CIU) {
  165. auto *AssumeCI = dyn_cast<CallInst>(CIU.getUser());
  166. if (!AssumeCI)
  167. return true;
  168. Function *F = AssumeCI->getCalledFunction();
  169. return !F || F->getIntrinsicID() != Intrinsic::assume;
  170. });
  171. if (HasNonAssumeUses)
  172. TypeTests.insert(Guid);
  173. SmallVector<DevirtCallSite, 4> DevirtCalls;
  174. SmallVector<CallInst *, 4> Assumes;
  175. findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
  176. for (auto &Call : DevirtCalls)
  177. addVCallToSet(Call, Guid, TypeTestAssumeVCalls,
  178. TypeTestAssumeConstVCalls);
  179. break;
  180. }
  181. case Intrinsic::type_checked_load: {
  182. auto *TypeMDVal = cast<MetadataAsValue>(CI->getArgOperand(2));
  183. auto *TypeId = dyn_cast<MDString>(TypeMDVal->getMetadata());
  184. if (!TypeId)
  185. break;
  186. GlobalValue::GUID Guid = GlobalValue::getGUID(TypeId->getString());
  187. SmallVector<DevirtCallSite, 4> DevirtCalls;
  188. SmallVector<Instruction *, 4> LoadedPtrs;
  189. SmallVector<Instruction *, 4> Preds;
  190. bool HasNonCallUses = false;
  191. findDevirtualizableCallsForTypeCheckedLoad(DevirtCalls, LoadedPtrs, Preds,
  192. HasNonCallUses, CI, DT);
  193. // Any non-call uses of the result of llvm.type.checked.load will
  194. // prevent us from optimizing away the llvm.type.test.
  195. if (HasNonCallUses)
  196. TypeTests.insert(Guid);
  197. for (auto &Call : DevirtCalls)
  198. addVCallToSet(Call, Guid, TypeCheckedLoadVCalls,
  199. TypeCheckedLoadConstVCalls);
  200. break;
  201. }
  202. default:
  203. break;
  204. }
  205. }
  206. static bool isNonVolatileLoad(const Instruction *I) {
  207. if (const auto *LI = dyn_cast<LoadInst>(I))
  208. return !LI->isVolatile();
  209. return false;
  210. }
  211. static void computeFunctionSummary(ModuleSummaryIndex &Index, const Module &M,
  212. const Function &F, BlockFrequencyInfo *BFI,
  213. ProfileSummaryInfo *PSI, DominatorTree &DT,
  214. bool HasLocalsInUsedOrAsm,
  215. DenseSet<GlobalValue::GUID> &CantBePromoted,
  216. bool IsThinLTO) {
  217. // Summary not currently supported for anonymous functions, they should
  218. // have been named.
  219. assert(F.hasName());
  220. unsigned NumInsts = 0;
  221. // Map from callee ValueId to profile count. Used to accumulate profile
  222. // counts for all static calls to a given callee.
  223. MapVector<ValueInfo, CalleeInfo> CallGraphEdges;
  224. SetVector<ValueInfo> RefEdges;
  225. SetVector<GlobalValue::GUID> TypeTests;
  226. SetVector<FunctionSummary::VFuncId> TypeTestAssumeVCalls,
  227. TypeCheckedLoadVCalls;
  228. SetVector<FunctionSummary::ConstVCall> TypeTestAssumeConstVCalls,
  229. TypeCheckedLoadConstVCalls;
  230. ICallPromotionAnalysis ICallAnalysis;
  231. SmallPtrSet<const User *, 8> Visited;
  232. // Add personality function, prefix data and prologue data to function's ref
  233. // list.
  234. findRefEdges(Index, &F, RefEdges, Visited);
  235. std::vector<const Instruction *> NonVolatileLoads;
  236. bool HasInlineAsmMaybeReferencingInternal = false;
  237. for (const BasicBlock &BB : F)
  238. for (const Instruction &I : BB) {
  239. if (isa<DbgInfoIntrinsic>(I))
  240. continue;
  241. ++NumInsts;
  242. if (isNonVolatileLoad(&I)) {
  243. // Postpone processing of non-volatile load instructions
  244. // See comments below
  245. Visited.insert(&I);
  246. NonVolatileLoads.push_back(&I);
  247. continue;
  248. }
  249. findRefEdges(Index, &I, RefEdges, Visited);
  250. auto CS = ImmutableCallSite(&I);
  251. if (!CS)
  252. continue;
  253. const auto *CI = dyn_cast<CallInst>(&I);
  254. // Since we don't know exactly which local values are referenced in inline
  255. // assembly, conservatively mark the function as possibly referencing
  256. // a local value from inline assembly to ensure we don't export a
  257. // reference (which would require renaming and promotion of the
  258. // referenced value).
  259. if (HasLocalsInUsedOrAsm && CI && CI->isInlineAsm())
  260. HasInlineAsmMaybeReferencingInternal = true;
  261. auto *CalledValue = CS.getCalledValue();
  262. auto *CalledFunction = CS.getCalledFunction();
  263. if (CalledValue && !CalledFunction) {
  264. CalledValue = CalledValue->stripPointerCastsNoFollowAliases();
  265. // Stripping pointer casts can reveal a called function.
  266. CalledFunction = dyn_cast<Function>(CalledValue);
  267. }
  268. // Check if this is an alias to a function. If so, get the
  269. // called aliasee for the checks below.
  270. if (auto *GA = dyn_cast<GlobalAlias>(CalledValue)) {
  271. assert(!CalledFunction && "Expected null called function in callsite for alias");
  272. CalledFunction = dyn_cast<Function>(GA->getBaseObject());
  273. }
  274. // Check if this is a direct call to a known function or a known
  275. // intrinsic, or an indirect call with profile data.
  276. if (CalledFunction) {
  277. if (CI && CalledFunction->isIntrinsic()) {
  278. addIntrinsicToSummary(
  279. CI, TypeTests, TypeTestAssumeVCalls, TypeCheckedLoadVCalls,
  280. TypeTestAssumeConstVCalls, TypeCheckedLoadConstVCalls, DT);
  281. continue;
  282. }
  283. // We should have named any anonymous globals
  284. assert(CalledFunction->hasName());
  285. auto ScaledCount = PSI->getProfileCount(&I, BFI);
  286. auto Hotness = ScaledCount ? getHotness(ScaledCount.getValue(), PSI)
  287. : CalleeInfo::HotnessType::Unknown;
  288. if (ForceSummaryEdgesCold != FunctionSummary::FSHT_None)
  289. Hotness = CalleeInfo::HotnessType::Cold;
  290. // Use the original CalledValue, in case it was an alias. We want
  291. // to record the call edge to the alias in that case. Eventually
  292. // an alias summary will be created to associate the alias and
  293. // aliasee.
  294. auto &ValueInfo = CallGraphEdges[Index.getOrInsertValueInfo(
  295. cast<GlobalValue>(CalledValue))];
  296. ValueInfo.updateHotness(Hotness);
  297. // Add the relative block frequency to CalleeInfo if there is no profile
  298. // information.
  299. if (BFI != nullptr && Hotness == CalleeInfo::HotnessType::Unknown) {
  300. uint64_t BBFreq = BFI->getBlockFreq(&BB).getFrequency();
  301. uint64_t EntryFreq = BFI->getEntryFreq();
  302. ValueInfo.updateRelBlockFreq(BBFreq, EntryFreq);
  303. }
  304. } else {
  305. // Skip inline assembly calls.
  306. if (CI && CI->isInlineAsm())
  307. continue;
  308. // Skip direct calls.
  309. if (!CalledValue || isa<Constant>(CalledValue))
  310. continue;
  311. // Check if the instruction has a callees metadata. If so, add callees
  312. // to CallGraphEdges to reflect the references from the metadata, and
  313. // to enable importing for subsequent indirect call promotion and
  314. // inlining.
  315. if (auto *MD = I.getMetadata(LLVMContext::MD_callees)) {
  316. for (auto &Op : MD->operands()) {
  317. Function *Callee = mdconst::extract_or_null<Function>(Op);
  318. if (Callee)
  319. CallGraphEdges[Index.getOrInsertValueInfo(Callee)];
  320. }
  321. }
  322. uint32_t NumVals, NumCandidates;
  323. uint64_t TotalCount;
  324. auto CandidateProfileData =
  325. ICallAnalysis.getPromotionCandidatesForInstruction(
  326. &I, NumVals, TotalCount, NumCandidates);
  327. for (auto &Candidate : CandidateProfileData)
  328. CallGraphEdges[Index.getOrInsertValueInfo(Candidate.Value)]
  329. .updateHotness(getHotness(Candidate.Count, PSI));
  330. }
  331. }
  332. // By now we processed all instructions in a function, except
  333. // non-volatile loads. All new refs we add in a loop below
  334. // are obviously constant. All constant refs are grouped in the
  335. // end of RefEdges vector, so we can use a single integer value
  336. // to identify them.
  337. unsigned RefCnt = RefEdges.size();
  338. for (const Instruction *I : NonVolatileLoads) {
  339. Visited.erase(I);
  340. findRefEdges(Index, I, RefEdges, Visited);
  341. }
  342. std::vector<ValueInfo> Refs = RefEdges.takeVector();
  343. // Regular LTO module doesn't participate in ThinLTO import,
  344. // so no reference from it can be readonly, since this would
  345. // require importing variable as local copy
  346. if (IsThinLTO)
  347. for (; RefCnt < Refs.size(); ++RefCnt)
  348. Refs[RefCnt].setReadOnly();
  349. // Explicit add hot edges to enforce importing for designated GUIDs for
  350. // sample PGO, to enable the same inlines as the profiled optimized binary.
  351. for (auto &I : F.getImportGUIDs())
  352. CallGraphEdges[Index.getOrInsertValueInfo(I)].updateHotness(
  353. ForceSummaryEdgesCold == FunctionSummary::FSHT_All
  354. ? CalleeInfo::HotnessType::Cold
  355. : CalleeInfo::HotnessType::Critical);
  356. bool NonRenamableLocal = isNonRenamableLocal(F);
  357. bool NotEligibleForImport =
  358. NonRenamableLocal || HasInlineAsmMaybeReferencingInternal;
  359. GlobalValueSummary::GVFlags Flags(F.getLinkage(), NotEligibleForImport,
  360. /* Live = */ false, F.isDSOLocal(),
  361. F.hasLinkOnceODRLinkage() && F.hasGlobalUnnamedAddr());
  362. FunctionSummary::FFlags FunFlags{
  363. F.hasFnAttribute(Attribute::ReadNone),
  364. F.hasFnAttribute(Attribute::ReadOnly),
  365. F.hasFnAttribute(Attribute::NoRecurse), F.returnDoesNotAlias(),
  366. // FIXME: refactor this to use the same code that inliner is using.
  367. // Don't try to import functions with noinline attribute.
  368. F.getAttributes().hasFnAttribute(Attribute::NoInline)};
  369. auto FuncSummary = llvm::make_unique<FunctionSummary>(
  370. Flags, NumInsts, FunFlags, /*EntryCount=*/0, std::move(Refs),
  371. CallGraphEdges.takeVector(), TypeTests.takeVector(),
  372. TypeTestAssumeVCalls.takeVector(), TypeCheckedLoadVCalls.takeVector(),
  373. TypeTestAssumeConstVCalls.takeVector(),
  374. TypeCheckedLoadConstVCalls.takeVector());
  375. if (NonRenamableLocal)
  376. CantBePromoted.insert(F.getGUID());
  377. Index.addGlobalValueSummary(F, std::move(FuncSummary));
  378. }
  379. /// Find function pointers referenced within the given vtable initializer
  380. /// (or subset of an initializer) \p I. The starting offset of \p I within
  381. /// the vtable initializer is \p StartingOffset. Any discovered function
  382. /// pointers are added to \p VTableFuncs along with their cumulative offset
  383. /// within the initializer.
  384. static void findFuncPointers(const Constant *I, uint64_t StartingOffset,
  385. const Module &M, ModuleSummaryIndex &Index,
  386. VTableFuncList &VTableFuncs) {
  387. // First check if this is a function pointer.
  388. if (I->getType()->isPointerTy()) {
  389. auto Fn = dyn_cast<Function>(I->stripPointerCasts());
  390. // We can disregard __cxa_pure_virtual as a possible call target, as
  391. // calls to pure virtuals are UB.
  392. if (Fn && Fn->getName() != "__cxa_pure_virtual")
  393. VTableFuncs.push_back({Index.getOrInsertValueInfo(Fn), StartingOffset});
  394. return;
  395. }
  396. // Walk through the elements in the constant struct or array and recursively
  397. // look for virtual function pointers.
  398. const DataLayout &DL = M.getDataLayout();
  399. if (auto *C = dyn_cast<ConstantStruct>(I)) {
  400. StructType *STy = dyn_cast<StructType>(C->getType());
  401. assert(STy);
  402. const StructLayout *SL = DL.getStructLayout(C->getType());
  403. for (StructType::element_iterator EB = STy->element_begin(), EI = EB,
  404. EE = STy->element_end();
  405. EI != EE; ++EI) {
  406. auto Offset = SL->getElementOffset(EI - EB);
  407. unsigned Op = SL->getElementContainingOffset(Offset);
  408. findFuncPointers(cast<Constant>(I->getOperand(Op)),
  409. StartingOffset + Offset, M, Index, VTableFuncs);
  410. }
  411. } else if (auto *C = dyn_cast<ConstantArray>(I)) {
  412. ArrayType *ATy = C->getType();
  413. Type *EltTy = ATy->getElementType();
  414. uint64_t EltSize = DL.getTypeAllocSize(EltTy);
  415. for (unsigned i = 0, e = ATy->getNumElements(); i != e; ++i) {
  416. findFuncPointers(cast<Constant>(I->getOperand(i)),
  417. StartingOffset + i * EltSize, M, Index, VTableFuncs);
  418. }
  419. }
  420. }
  421. // Identify the function pointers referenced by vtable definition \p V.
  422. static void computeVTableFuncs(ModuleSummaryIndex &Index,
  423. const GlobalVariable &V, const Module &M,
  424. VTableFuncList &VTableFuncs) {
  425. if (!V.isConstant())
  426. return;
  427. findFuncPointers(V.getInitializer(), /*StartingOffset=*/0, M, Index,
  428. VTableFuncs);
  429. #ifndef NDEBUG
  430. // Validate that the VTableFuncs list is ordered by offset.
  431. uint64_t PrevOffset = 0;
  432. for (auto &P : VTableFuncs) {
  433. // The findVFuncPointers traversal should have encountered the
  434. // functions in offset order. We need to use ">=" since PrevOffset
  435. // starts at 0.
  436. assert(P.VTableOffset >= PrevOffset);
  437. PrevOffset = P.VTableOffset;
  438. }
  439. #endif
  440. }
  441. /// Record vtable definition \p V for each type metadata it references.
  442. static void
  443. recordTypeIdCompatibleVtableReferences(ModuleSummaryIndex &Index,
  444. const GlobalVariable &V,
  445. SmallVectorImpl<MDNode *> &Types) {
  446. for (MDNode *Type : Types) {
  447. auto TypeID = Type->getOperand(1).get();
  448. uint64_t Offset =
  449. cast<ConstantInt>(
  450. cast<ConstantAsMetadata>(Type->getOperand(0))->getValue())
  451. ->getZExtValue();
  452. if (auto *TypeId = dyn_cast<MDString>(TypeID))
  453. Index.getOrInsertTypeIdCompatibleVtableSummary(TypeId->getString())
  454. .push_back({Offset, Index.getOrInsertValueInfo(&V)});
  455. }
  456. }
  457. static void computeVariableSummary(ModuleSummaryIndex &Index,
  458. const GlobalVariable &V,
  459. DenseSet<GlobalValue::GUID> &CantBePromoted,
  460. const Module &M,
  461. SmallVectorImpl<MDNode *> &Types) {
  462. SetVector<ValueInfo> RefEdges;
  463. SmallPtrSet<const User *, 8> Visited;
  464. bool HasBlockAddress = findRefEdges(Index, &V, RefEdges, Visited);
  465. bool NonRenamableLocal = isNonRenamableLocal(V);
  466. GlobalValueSummary::GVFlags Flags(V.getLinkage(), NonRenamableLocal,
  467. /* Live = */ false, V.isDSOLocal(),
  468. V.hasLinkOnceODRLinkage() && V.hasGlobalUnnamedAddr());
  469. VTableFuncList VTableFuncs;
  470. // If splitting is not enabled, then we compute the summary information
  471. // necessary for index-based whole program devirtualization.
  472. if (!Index.enableSplitLTOUnit()) {
  473. Types.clear();
  474. V.getMetadata(LLVMContext::MD_type, Types);
  475. if (!Types.empty()) {
  476. // Identify the function pointers referenced by this vtable definition.
  477. computeVTableFuncs(Index, V, M, VTableFuncs);
  478. // Record this vtable definition for each type metadata it references.
  479. recordTypeIdCompatibleVtableReferences(Index, V, Types);
  480. }
  481. }
  482. // Don't mark variables we won't be able to internalize as read-only.
  483. GlobalVarSummary::GVarFlags VarFlags(
  484. !V.hasComdat() && !V.hasAppendingLinkage() && !V.isInterposable() &&
  485. !V.hasAvailableExternallyLinkage() && !V.hasDLLExportStorageClass());
  486. auto GVarSummary = llvm::make_unique<GlobalVarSummary>(Flags, VarFlags,
  487. RefEdges.takeVector());
  488. if (NonRenamableLocal)
  489. CantBePromoted.insert(V.getGUID());
  490. if (HasBlockAddress)
  491. GVarSummary->setNotEligibleToImport();
  492. if (!VTableFuncs.empty())
  493. GVarSummary->setVTableFuncs(VTableFuncs);
  494. Index.addGlobalValueSummary(V, std::move(GVarSummary));
  495. }
  496. static void
  497. computeAliasSummary(ModuleSummaryIndex &Index, const GlobalAlias &A,
  498. DenseSet<GlobalValue::GUID> &CantBePromoted) {
  499. bool NonRenamableLocal = isNonRenamableLocal(A);
  500. GlobalValueSummary::GVFlags Flags(A.getLinkage(), NonRenamableLocal,
  501. /* Live = */ false, A.isDSOLocal(),
  502. A.hasLinkOnceODRLinkage() && A.hasGlobalUnnamedAddr());
  503. auto AS = llvm::make_unique<AliasSummary>(Flags);
  504. auto *Aliasee = A.getBaseObject();
  505. auto AliaseeVI = Index.getValueInfo(Aliasee->getGUID());
  506. assert(AliaseeVI && "Alias expects aliasee summary to be available");
  507. assert(AliaseeVI.getSummaryList().size() == 1 &&
  508. "Expected a single entry per aliasee in per-module index");
  509. AS->setAliasee(AliaseeVI, AliaseeVI.getSummaryList()[0].get());
  510. if (NonRenamableLocal)
  511. CantBePromoted.insert(A.getGUID());
  512. Index.addGlobalValueSummary(A, std::move(AS));
  513. }
  514. // Set LiveRoot flag on entries matching the given value name.
  515. static void setLiveRoot(ModuleSummaryIndex &Index, StringRef Name) {
  516. if (ValueInfo VI = Index.getValueInfo(GlobalValue::getGUID(Name)))
  517. for (auto &Summary : VI.getSummaryList())
  518. Summary->setLive(true);
  519. }
  520. ModuleSummaryIndex llvm::buildModuleSummaryIndex(
  521. const Module &M,
  522. std::function<BlockFrequencyInfo *(const Function &F)> GetBFICallback,
  523. ProfileSummaryInfo *PSI) {
  524. assert(PSI);
  525. bool EnableSplitLTOUnit = false;
  526. if (auto *MD = mdconst::extract_or_null<ConstantInt>(
  527. M.getModuleFlag("EnableSplitLTOUnit")))
  528. EnableSplitLTOUnit = MD->getZExtValue();
  529. ModuleSummaryIndex Index(/*HaveGVs=*/true, EnableSplitLTOUnit);
  530. // Identify the local values in the llvm.used and llvm.compiler.used sets,
  531. // which should not be exported as they would then require renaming and
  532. // promotion, but we may have opaque uses e.g. in inline asm. We collect them
  533. // here because we use this information to mark functions containing inline
  534. // assembly calls as not importable.
  535. SmallPtrSet<GlobalValue *, 8> LocalsUsed;
  536. SmallPtrSet<GlobalValue *, 8> Used;
  537. // First collect those in the llvm.used set.
  538. collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ false);
  539. // Next collect those in the llvm.compiler.used set.
  540. collectUsedGlobalVariables(M, Used, /*CompilerUsed*/ true);
  541. DenseSet<GlobalValue::GUID> CantBePromoted;
  542. for (auto *V : Used) {
  543. if (V->hasLocalLinkage()) {
  544. LocalsUsed.insert(V);
  545. CantBePromoted.insert(V->getGUID());
  546. }
  547. }
  548. bool HasLocalInlineAsmSymbol = false;
  549. if (!M.getModuleInlineAsm().empty()) {
  550. // Collect the local values defined by module level asm, and set up
  551. // summaries for these symbols so that they can be marked as NoRename,
  552. // to prevent export of any use of them in regular IR that would require
  553. // renaming within the module level asm. Note we don't need to create a
  554. // summary for weak or global defs, as they don't need to be flagged as
  555. // NoRename, and defs in module level asm can't be imported anyway.
  556. // Also, any values used but not defined within module level asm should
  557. // be listed on the llvm.used or llvm.compiler.used global and marked as
  558. // referenced from there.
  559. ModuleSymbolTable::CollectAsmSymbols(
  560. M, [&](StringRef Name, object::BasicSymbolRef::Flags Flags) {
  561. // Symbols not marked as Weak or Global are local definitions.
  562. if (Flags & (object::BasicSymbolRef::SF_Weak |
  563. object::BasicSymbolRef::SF_Global))
  564. return;
  565. HasLocalInlineAsmSymbol = true;
  566. GlobalValue *GV = M.getNamedValue(Name);
  567. if (!GV)
  568. return;
  569. assert(GV->isDeclaration() && "Def in module asm already has definition");
  570. GlobalValueSummary::GVFlags GVFlags(GlobalValue::InternalLinkage,
  571. /* NotEligibleToImport = */ true,
  572. /* Live = */ true,
  573. /* Local */ GV->isDSOLocal(),
  574. GV->hasLinkOnceODRLinkage() && GV->hasGlobalUnnamedAddr());
  575. CantBePromoted.insert(GV->getGUID());
  576. // Create the appropriate summary type.
  577. if (Function *F = dyn_cast<Function>(GV)) {
  578. std::unique_ptr<FunctionSummary> Summary =
  579. llvm::make_unique<FunctionSummary>(
  580. GVFlags, /*InstCount=*/0,
  581. FunctionSummary::FFlags{
  582. F->hasFnAttribute(Attribute::ReadNone),
  583. F->hasFnAttribute(Attribute::ReadOnly),
  584. F->hasFnAttribute(Attribute::NoRecurse),
  585. F->returnDoesNotAlias(),
  586. /* NoInline = */ false},
  587. /*EntryCount=*/0, ArrayRef<ValueInfo>{},
  588. ArrayRef<FunctionSummary::EdgeTy>{},
  589. ArrayRef<GlobalValue::GUID>{},
  590. ArrayRef<FunctionSummary::VFuncId>{},
  591. ArrayRef<FunctionSummary::VFuncId>{},
  592. ArrayRef<FunctionSummary::ConstVCall>{},
  593. ArrayRef<FunctionSummary::ConstVCall>{});
  594. Index.addGlobalValueSummary(*GV, std::move(Summary));
  595. } else {
  596. std::unique_ptr<GlobalVarSummary> Summary =
  597. llvm::make_unique<GlobalVarSummary>(
  598. GVFlags, GlobalVarSummary::GVarFlags(),
  599. ArrayRef<ValueInfo>{});
  600. Index.addGlobalValueSummary(*GV, std::move(Summary));
  601. }
  602. });
  603. }
  604. bool IsThinLTO = true;
  605. if (auto *MD =
  606. mdconst::extract_or_null<ConstantInt>(M.getModuleFlag("ThinLTO")))
  607. IsThinLTO = MD->getZExtValue();
  608. // Compute summaries for all functions defined in module, and save in the
  609. // index.
  610. for (auto &F : M) {
  611. if (F.isDeclaration())
  612. continue;
  613. DominatorTree DT(const_cast<Function &>(F));
  614. BlockFrequencyInfo *BFI = nullptr;
  615. std::unique_ptr<BlockFrequencyInfo> BFIPtr;
  616. if (GetBFICallback)
  617. BFI = GetBFICallback(F);
  618. else if (F.hasProfileData()) {
  619. LoopInfo LI{DT};
  620. BranchProbabilityInfo BPI{F, LI};
  621. BFIPtr = llvm::make_unique<BlockFrequencyInfo>(F, BPI, LI);
  622. BFI = BFIPtr.get();
  623. }
  624. computeFunctionSummary(Index, M, F, BFI, PSI, DT,
  625. !LocalsUsed.empty() || HasLocalInlineAsmSymbol,
  626. CantBePromoted, IsThinLTO);
  627. }
  628. // Compute summaries for all variables defined in module, and save in the
  629. // index.
  630. SmallVector<MDNode *, 2> Types;
  631. for (const GlobalVariable &G : M.globals()) {
  632. if (G.isDeclaration())
  633. continue;
  634. computeVariableSummary(Index, G, CantBePromoted, M, Types);
  635. }
  636. // Compute summaries for all aliases defined in module, and save in the
  637. // index.
  638. for (const GlobalAlias &A : M.aliases())
  639. computeAliasSummary(Index, A, CantBePromoted);
  640. for (auto *V : LocalsUsed) {
  641. auto *Summary = Index.getGlobalValueSummary(*V);
  642. assert(Summary && "Missing summary for global value");
  643. Summary->setNotEligibleToImport();
  644. }
  645. // The linker doesn't know about these LLVM produced values, so we need
  646. // to flag them as live in the index to ensure index-based dead value
  647. // analysis treats them as live roots of the analysis.
  648. setLiveRoot(Index, "llvm.used");
  649. setLiveRoot(Index, "llvm.compiler.used");
  650. setLiveRoot(Index, "llvm.global_ctors");
  651. setLiveRoot(Index, "llvm.global_dtors");
  652. setLiveRoot(Index, "llvm.global.annotations");
  653. for (auto &GlobalList : Index) {
  654. // Ignore entries for references that are undefined in the current module.
  655. if (GlobalList.second.SummaryList.empty())
  656. continue;
  657. assert(GlobalList.second.SummaryList.size() == 1 &&
  658. "Expected module's index to have one summary per GUID");
  659. auto &Summary = GlobalList.second.SummaryList[0];
  660. if (!IsThinLTO) {
  661. Summary->setNotEligibleToImport();
  662. continue;
  663. }
  664. bool AllRefsCanBeExternallyReferenced =
  665. llvm::all_of(Summary->refs(), [&](const ValueInfo &VI) {
  666. return !CantBePromoted.count(VI.getGUID());
  667. });
  668. if (!AllRefsCanBeExternallyReferenced) {
  669. Summary->setNotEligibleToImport();
  670. continue;
  671. }
  672. if (auto *FuncSummary = dyn_cast<FunctionSummary>(Summary.get())) {
  673. bool AllCallsCanBeExternallyReferenced = llvm::all_of(
  674. FuncSummary->calls(), [&](const FunctionSummary::EdgeTy &Edge) {
  675. return !CantBePromoted.count(Edge.first.getGUID());
  676. });
  677. if (!AllCallsCanBeExternallyReferenced)
  678. Summary->setNotEligibleToImport();
  679. }
  680. }
  681. if (!ModuleSummaryDotFile.empty()) {
  682. std::error_code EC;
  683. raw_fd_ostream OSDot(ModuleSummaryDotFile, EC, sys::fs::OpenFlags::F_None);
  684. if (EC)
  685. report_fatal_error(Twine("Failed to open dot file ") +
  686. ModuleSummaryDotFile + ": " + EC.message() + "\n");
  687. Index.exportToDot(OSDot);
  688. }
  689. return Index;
  690. }
  691. AnalysisKey ModuleSummaryIndexAnalysis::Key;
  692. ModuleSummaryIndex
  693. ModuleSummaryIndexAnalysis::run(Module &M, ModuleAnalysisManager &AM) {
  694. ProfileSummaryInfo &PSI = AM.getResult<ProfileSummaryAnalysis>(M);
  695. auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
  696. return buildModuleSummaryIndex(
  697. M,
  698. [&FAM](const Function &F) {
  699. return &FAM.getResult<BlockFrequencyAnalysis>(
  700. *const_cast<Function *>(&F));
  701. },
  702. &PSI);
  703. }
  704. char ModuleSummaryIndexWrapperPass::ID = 0;
  705. INITIALIZE_PASS_BEGIN(ModuleSummaryIndexWrapperPass, "module-summary-analysis",
  706. "Module Summary Analysis", false, true)
  707. INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
  708. INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
  709. INITIALIZE_PASS_END(ModuleSummaryIndexWrapperPass, "module-summary-analysis",
  710. "Module Summary Analysis", false, true)
  711. ModulePass *llvm::createModuleSummaryIndexWrapperPass() {
  712. return new ModuleSummaryIndexWrapperPass();
  713. }
  714. ModuleSummaryIndexWrapperPass::ModuleSummaryIndexWrapperPass()
  715. : ModulePass(ID) {
  716. initializeModuleSummaryIndexWrapperPassPass(*PassRegistry::getPassRegistry());
  717. }
  718. bool ModuleSummaryIndexWrapperPass::runOnModule(Module &M) {
  719. auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
  720. Index.emplace(buildModuleSummaryIndex(
  721. M,
  722. [this](const Function &F) {
  723. return &(this->getAnalysis<BlockFrequencyInfoWrapperPass>(
  724. *const_cast<Function *>(&F))
  725. .getBFI());
  726. },
  727. PSI));
  728. return false;
  729. }
  730. bool ModuleSummaryIndexWrapperPass::doFinalization(Module &M) {
  731. Index.reset();
  732. return false;
  733. }
  734. void ModuleSummaryIndexWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
  735. AU.setPreservesAll();
  736. AU.addRequired<BlockFrequencyInfoWrapperPass>();
  737. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  738. }