PGOInstrumentation.cpp 67 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884
  1. //===- PGOInstrumentation.cpp - MST-based PGO Instrumentation -------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements PGO instrumentation using a minimum spanning tree based
  10. // on the following paper:
  11. // [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points
  12. // for program frequency counts. BIT Numerical Mathematics 1973, Volume 13,
  13. // Issue 3, pp 313-322
  14. // The idea of the algorithm based on the fact that for each node (except for
  15. // the entry and exit), the sum of incoming edge counts equals the sum of
  16. // outgoing edge counts. The count of edge on spanning tree can be derived from
  17. // those edges not on the spanning tree. Knuth proves this method instruments
  18. // the minimum number of edges.
  19. //
  20. // The minimal spanning tree here is actually a maximum weight tree -- on-tree
  21. // edges have higher frequencies (more likely to execute). The idea is to
  22. // instrument those less frequently executed edges to reduce the runtime
  23. // overhead of instrumented binaries.
  24. //
  25. // This file contains two passes:
  26. // (1) Pass PGOInstrumentationGen which instruments the IR to generate edge
  27. // count profile, and generates the instrumentation for indirect call
  28. // profiling.
  29. // (2) Pass PGOInstrumentationUse which reads the edge count profile and
  30. // annotates the branch weights. It also reads the indirect call value
  31. // profiling records and annotate the indirect call instructions.
  32. //
  33. // To get the precise counter information, These two passes need to invoke at
  34. // the same compilation point (so they see the same IR). For pass
  35. // PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For
  36. // pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and
  37. // the profile is opened in module level and passed to each PGOUseFunc instance.
  38. // The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put
  39. // in class FuncPGOInstrumentation.
  40. //
  41. // Class PGOEdge represents a CFG edge and some auxiliary information. Class
  42. // BBInfo contains auxiliary information for each BB. These two classes are used
  43. // in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived
  44. // class of PGOEdge and BBInfo, respectively. They contains extra data structure
  45. // used in populating profile counters.
  46. // The MST implementation is in Class CFGMST (CFGMST.h).
  47. //
  48. //===----------------------------------------------------------------------===//
  49. #include "CFGMST.h"
  50. #include "llvm/ADT/APInt.h"
  51. #include "llvm/ADT/ArrayRef.h"
  52. #include "llvm/ADT/STLExtras.h"
  53. #include "llvm/ADT/SmallVector.h"
  54. #include "llvm/ADT/Statistic.h"
  55. #include "llvm/ADT/StringRef.h"
  56. #include "llvm/ADT/Triple.h"
  57. #include "llvm/ADT/Twine.h"
  58. #include "llvm/ADT/iterator.h"
  59. #include "llvm/ADT/iterator_range.h"
  60. #include "llvm/Analysis/BlockFrequencyInfo.h"
  61. #include "llvm/Analysis/BranchProbabilityInfo.h"
  62. #include "llvm/Analysis/CFG.h"
  63. #include "llvm/Analysis/IndirectCallVisitor.h"
  64. #include "llvm/Analysis/LoopInfo.h"
  65. #include "llvm/Analysis/OptimizationRemarkEmitter.h"
  66. #include "llvm/Analysis/ProfileSummaryInfo.h"
  67. #include "llvm/IR/Attributes.h"
  68. #include "llvm/IR/BasicBlock.h"
  69. #include "llvm/IR/CFG.h"
  70. #include "llvm/IR/CallSite.h"
  71. #include "llvm/IR/Comdat.h"
  72. #include "llvm/IR/Constant.h"
  73. #include "llvm/IR/Constants.h"
  74. #include "llvm/IR/DiagnosticInfo.h"
  75. #include "llvm/IR/Dominators.h"
  76. #include "llvm/IR/Function.h"
  77. #include "llvm/IR/GlobalAlias.h"
  78. #include "llvm/IR/GlobalValue.h"
  79. #include "llvm/IR/GlobalVariable.h"
  80. #include "llvm/IR/IRBuilder.h"
  81. #include "llvm/IR/InstVisitor.h"
  82. #include "llvm/IR/InstrTypes.h"
  83. #include "llvm/IR/Instruction.h"
  84. #include "llvm/IR/Instructions.h"
  85. #include "llvm/IR/IntrinsicInst.h"
  86. #include "llvm/IR/Intrinsics.h"
  87. #include "llvm/IR/LLVMContext.h"
  88. #include "llvm/IR/MDBuilder.h"
  89. #include "llvm/IR/Module.h"
  90. #include "llvm/IR/PassManager.h"
  91. #include "llvm/IR/ProfileSummary.h"
  92. #include "llvm/IR/Type.h"
  93. #include "llvm/IR/Value.h"
  94. #include "llvm/Pass.h"
  95. #include "llvm/ProfileData/InstrProf.h"
  96. #include "llvm/ProfileData/InstrProfReader.h"
  97. #include "llvm/Support/BranchProbability.h"
  98. #include "llvm/Support/Casting.h"
  99. #include "llvm/Support/CommandLine.h"
  100. #include "llvm/Support/DOTGraphTraits.h"
  101. #include "llvm/Support/Debug.h"
  102. #include "llvm/Support/Error.h"
  103. #include "llvm/Support/ErrorHandling.h"
  104. #include "llvm/Support/GraphWriter.h"
  105. #include "llvm/Support/JamCRC.h"
  106. #include "llvm/Support/raw_ostream.h"
  107. #include "llvm/Transforms/Instrumentation.h"
  108. #include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
  109. #include "llvm/Transforms/Utils/BasicBlockUtils.h"
  110. #include "llvm/Transforms/Utils/MisExpect.h"
  111. #include <algorithm>
  112. #include <cassert>
  113. #include <cstdint>
  114. #include <memory>
  115. #include <numeric>
  116. #include <string>
  117. #include <unordered_map>
  118. #include <utility>
  119. #include <vector>
  120. using namespace llvm;
  121. using ProfileCount = Function::ProfileCount;
  122. #define DEBUG_TYPE "pgo-instrumentation"
  123. STATISTIC(NumOfPGOInstrument, "Number of edges instrumented.");
  124. STATISTIC(NumOfPGOSelectInsts, "Number of select instruction instrumented.");
  125. STATISTIC(NumOfPGOMemIntrinsics, "Number of mem intrinsics instrumented.");
  126. STATISTIC(NumOfPGOEdge, "Number of edges.");
  127. STATISTIC(NumOfPGOBB, "Number of basic-blocks.");
  128. STATISTIC(NumOfPGOSplit, "Number of critical edge splits.");
  129. STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts.");
  130. STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile.");
  131. STATISTIC(NumOfPGOMissing, "Number of functions without profile.");
  132. STATISTIC(NumOfPGOICall, "Number of indirect call value instrumentations.");
  133. STATISTIC(NumOfCSPGOInstrument, "Number of edges instrumented in CSPGO.");
  134. STATISTIC(NumOfCSPGOSelectInsts,
  135. "Number of select instruction instrumented in CSPGO.");
  136. STATISTIC(NumOfCSPGOMemIntrinsics,
  137. "Number of mem intrinsics instrumented in CSPGO.");
  138. STATISTIC(NumOfCSPGOEdge, "Number of edges in CSPGO.");
  139. STATISTIC(NumOfCSPGOBB, "Number of basic-blocks in CSPGO.");
  140. STATISTIC(NumOfCSPGOSplit, "Number of critical edge splits in CSPGO.");
  141. STATISTIC(NumOfCSPGOFunc,
  142. "Number of functions having valid profile counts in CSPGO.");
  143. STATISTIC(NumOfCSPGOMismatch,
  144. "Number of functions having mismatch profile in CSPGO.");
  145. STATISTIC(NumOfCSPGOMissing, "Number of functions without profile in CSPGO.");
  146. // Command line option to specify the file to read profile from. This is
  147. // mainly used for testing.
  148. static cl::opt<std::string>
  149. PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden,
  150. cl::value_desc("filename"),
  151. cl::desc("Specify the path of profile data file. This is"
  152. "mainly for test purpose."));
  153. static cl::opt<std::string> PGOTestProfileRemappingFile(
  154. "pgo-test-profile-remapping-file", cl::init(""), cl::Hidden,
  155. cl::value_desc("filename"),
  156. cl::desc("Specify the path of profile remapping file. This is mainly for "
  157. "test purpose."));
  158. // Command line option to disable value profiling. The default is false:
  159. // i.e. value profiling is enabled by default. This is for debug purpose.
  160. static cl::opt<bool> DisableValueProfiling("disable-vp", cl::init(false),
  161. cl::Hidden,
  162. cl::desc("Disable Value Profiling"));
  163. // Command line option to set the maximum number of VP annotations to write to
  164. // the metadata for a single indirect call callsite.
  165. static cl::opt<unsigned> MaxNumAnnotations(
  166. "icp-max-annotations", cl::init(3), cl::Hidden, cl::ZeroOrMore,
  167. cl::desc("Max number of annotations for a single indirect "
  168. "call callsite"));
  169. // Command line option to set the maximum number of value annotations
  170. // to write to the metadata for a single memop intrinsic.
  171. static cl::opt<unsigned> MaxNumMemOPAnnotations(
  172. "memop-max-annotations", cl::init(4), cl::Hidden, cl::ZeroOrMore,
  173. cl::desc("Max number of preicise value annotations for a single memop"
  174. "intrinsic"));
  175. // Command line option to control appending FunctionHash to the name of a COMDAT
  176. // function. This is to avoid the hash mismatch caused by the preinliner.
  177. static cl::opt<bool> DoComdatRenaming(
  178. "do-comdat-renaming", cl::init(false), cl::Hidden,
  179. cl::desc("Append function hash to the name of COMDAT function to avoid "
  180. "function hash mismatch due to the preinliner"));
  181. // Command line option to enable/disable the warning about missing profile
  182. // information.
  183. static cl::opt<bool>
  184. PGOWarnMissing("pgo-warn-missing-function", cl::init(false), cl::Hidden,
  185. cl::desc("Use this option to turn on/off "
  186. "warnings about missing profile data for "
  187. "functions."));
  188. // Command line option to enable/disable the warning about a hash mismatch in
  189. // the profile data.
  190. static cl::opt<bool>
  191. NoPGOWarnMismatch("no-pgo-warn-mismatch", cl::init(false), cl::Hidden,
  192. cl::desc("Use this option to turn off/on "
  193. "warnings about profile cfg mismatch."));
  194. // Command line option to enable/disable the warning about a hash mismatch in
  195. // the profile data for Comdat functions, which often turns out to be false
  196. // positive due to the pre-instrumentation inline.
  197. static cl::opt<bool>
  198. NoPGOWarnMismatchComdat("no-pgo-warn-mismatch-comdat", cl::init(true),
  199. cl::Hidden,
  200. cl::desc("The option is used to turn on/off "
  201. "warnings about hash mismatch for comdat "
  202. "functions."));
  203. // Command line option to enable/disable select instruction instrumentation.
  204. static cl::opt<bool>
  205. PGOInstrSelect("pgo-instr-select", cl::init(true), cl::Hidden,
  206. cl::desc("Use this option to turn on/off SELECT "
  207. "instruction instrumentation. "));
  208. // Command line option to turn on CFG dot or text dump of raw profile counts
  209. static cl::opt<PGOViewCountsType> PGOViewRawCounts(
  210. "pgo-view-raw-counts", cl::Hidden,
  211. cl::desc("A boolean option to show CFG dag or text "
  212. "with raw profile counts from "
  213. "profile data. See also option "
  214. "-pgo-view-counts. To limit graph "
  215. "display to only one function, use "
  216. "filtering option -view-bfi-func-name."),
  217. cl::values(clEnumValN(PGOVCT_None, "none", "do not show."),
  218. clEnumValN(PGOVCT_Graph, "graph", "show a graph."),
  219. clEnumValN(PGOVCT_Text, "text", "show in text.")));
  220. // Command line option to enable/disable memop intrinsic call.size profiling.
  221. static cl::opt<bool>
  222. PGOInstrMemOP("pgo-instr-memop", cl::init(true), cl::Hidden,
  223. cl::desc("Use this option to turn on/off "
  224. "memory intrinsic size profiling."));
  225. // Emit branch probability as optimization remarks.
  226. static cl::opt<bool>
  227. EmitBranchProbability("pgo-emit-branch-prob", cl::init(false), cl::Hidden,
  228. cl::desc("When this option is on, the annotated "
  229. "branch probability will be emitted as "
  230. "optimization remarks: -{Rpass|"
  231. "pass-remarks}=pgo-instrumentation"));
  232. // Command line option to turn on CFG dot dump after profile annotation.
  233. // Defined in Analysis/BlockFrequencyInfo.cpp: -pgo-view-counts
  234. extern cl::opt<PGOViewCountsType> PGOViewCounts;
  235. // Command line option to specify the name of the function for CFG dump
  236. // Defined in Analysis/BlockFrequencyInfo.cpp: -view-bfi-func-name=
  237. extern cl::opt<std::string> ViewBlockFreqFuncName;
  238. // Return a string describing the branch condition that can be
  239. // used in static branch probability heuristics:
  240. static std::string getBranchCondString(Instruction *TI) {
  241. BranchInst *BI = dyn_cast<BranchInst>(TI);
  242. if (!BI || !BI->isConditional())
  243. return std::string();
  244. Value *Cond = BI->getCondition();
  245. ICmpInst *CI = dyn_cast<ICmpInst>(Cond);
  246. if (!CI)
  247. return std::string();
  248. std::string result;
  249. raw_string_ostream OS(result);
  250. OS << CmpInst::getPredicateName(CI->getPredicate()) << "_";
  251. CI->getOperand(0)->getType()->print(OS, true);
  252. Value *RHS = CI->getOperand(1);
  253. ConstantInt *CV = dyn_cast<ConstantInt>(RHS);
  254. if (CV) {
  255. if (CV->isZero())
  256. OS << "_Zero";
  257. else if (CV->isOne())
  258. OS << "_One";
  259. else if (CV->isMinusOne())
  260. OS << "_MinusOne";
  261. else
  262. OS << "_Const";
  263. }
  264. OS.flush();
  265. return result;
  266. }
  267. namespace {
  268. /// The select instruction visitor plays three roles specified
  269. /// by the mode. In \c VM_counting mode, it simply counts the number of
  270. /// select instructions. In \c VM_instrument mode, it inserts code to count
  271. /// the number times TrueValue of select is taken. In \c VM_annotate mode,
  272. /// it reads the profile data and annotate the select instruction with metadata.
  273. enum VisitMode { VM_counting, VM_instrument, VM_annotate };
  274. class PGOUseFunc;
  275. /// Instruction Visitor class to visit select instructions.
  276. struct SelectInstVisitor : public InstVisitor<SelectInstVisitor> {
  277. Function &F;
  278. unsigned NSIs = 0; // Number of select instructions instrumented.
  279. VisitMode Mode = VM_counting; // Visiting mode.
  280. unsigned *CurCtrIdx = nullptr; // Pointer to current counter index.
  281. unsigned TotalNumCtrs = 0; // Total number of counters
  282. GlobalVariable *FuncNameVar = nullptr;
  283. uint64_t FuncHash = 0;
  284. PGOUseFunc *UseFunc = nullptr;
  285. SelectInstVisitor(Function &Func) : F(Func) {}
  286. void countSelects(Function &Func) {
  287. NSIs = 0;
  288. Mode = VM_counting;
  289. visit(Func);
  290. }
  291. // Visit the IR stream and instrument all select instructions. \p
  292. // Ind is a pointer to the counter index variable; \p TotalNC
  293. // is the total number of counters; \p FNV is the pointer to the
  294. // PGO function name var; \p FHash is the function hash.
  295. void instrumentSelects(Function &Func, unsigned *Ind, unsigned TotalNC,
  296. GlobalVariable *FNV, uint64_t FHash) {
  297. Mode = VM_instrument;
  298. CurCtrIdx = Ind;
  299. TotalNumCtrs = TotalNC;
  300. FuncHash = FHash;
  301. FuncNameVar = FNV;
  302. visit(Func);
  303. }
  304. // Visit the IR stream and annotate all select instructions.
  305. void annotateSelects(Function &Func, PGOUseFunc *UF, unsigned *Ind) {
  306. Mode = VM_annotate;
  307. UseFunc = UF;
  308. CurCtrIdx = Ind;
  309. visit(Func);
  310. }
  311. void instrumentOneSelectInst(SelectInst &SI);
  312. void annotateOneSelectInst(SelectInst &SI);
  313. // Visit \p SI instruction and perform tasks according to visit mode.
  314. void visitSelectInst(SelectInst &SI);
  315. // Return the number of select instructions. This needs be called after
  316. // countSelects().
  317. unsigned getNumOfSelectInsts() const { return NSIs; }
  318. };
  319. /// Instruction Visitor class to visit memory intrinsic calls.
  320. struct MemIntrinsicVisitor : public InstVisitor<MemIntrinsicVisitor> {
  321. Function &F;
  322. unsigned NMemIs = 0; // Number of memIntrinsics instrumented.
  323. VisitMode Mode = VM_counting; // Visiting mode.
  324. unsigned CurCtrId = 0; // Current counter index.
  325. unsigned TotalNumCtrs = 0; // Total number of counters
  326. GlobalVariable *FuncNameVar = nullptr;
  327. uint64_t FuncHash = 0;
  328. PGOUseFunc *UseFunc = nullptr;
  329. std::vector<Instruction *> Candidates;
  330. MemIntrinsicVisitor(Function &Func) : F(Func) {}
  331. void countMemIntrinsics(Function &Func) {
  332. NMemIs = 0;
  333. Mode = VM_counting;
  334. visit(Func);
  335. }
  336. void instrumentMemIntrinsics(Function &Func, unsigned TotalNC,
  337. GlobalVariable *FNV, uint64_t FHash) {
  338. Mode = VM_instrument;
  339. TotalNumCtrs = TotalNC;
  340. FuncHash = FHash;
  341. FuncNameVar = FNV;
  342. visit(Func);
  343. }
  344. std::vector<Instruction *> findMemIntrinsics(Function &Func) {
  345. Candidates.clear();
  346. Mode = VM_annotate;
  347. visit(Func);
  348. return Candidates;
  349. }
  350. // Visit the IR stream and annotate all mem intrinsic call instructions.
  351. void instrumentOneMemIntrinsic(MemIntrinsic &MI);
  352. // Visit \p MI instruction and perform tasks according to visit mode.
  353. void visitMemIntrinsic(MemIntrinsic &SI);
  354. unsigned getNumOfMemIntrinsics() const { return NMemIs; }
  355. };
  356. class PGOInstrumentationGenLegacyPass : public ModulePass {
  357. public:
  358. static char ID;
  359. PGOInstrumentationGenLegacyPass(bool IsCS = false)
  360. : ModulePass(ID), IsCS(IsCS) {
  361. initializePGOInstrumentationGenLegacyPassPass(
  362. *PassRegistry::getPassRegistry());
  363. }
  364. StringRef getPassName() const override { return "PGOInstrumentationGenPass"; }
  365. private:
  366. // Is this is context-sensitive instrumentation.
  367. bool IsCS;
  368. bool runOnModule(Module &M) override;
  369. void getAnalysisUsage(AnalysisUsage &AU) const override {
  370. AU.addRequired<BlockFrequencyInfoWrapperPass>();
  371. }
  372. };
  373. class PGOInstrumentationUseLegacyPass : public ModulePass {
  374. public:
  375. static char ID;
  376. // Provide the profile filename as the parameter.
  377. PGOInstrumentationUseLegacyPass(std::string Filename = "", bool IsCS = false)
  378. : ModulePass(ID), ProfileFileName(std::move(Filename)), IsCS(IsCS) {
  379. if (!PGOTestProfileFile.empty())
  380. ProfileFileName = PGOTestProfileFile;
  381. initializePGOInstrumentationUseLegacyPassPass(
  382. *PassRegistry::getPassRegistry());
  383. }
  384. StringRef getPassName() const override { return "PGOInstrumentationUsePass"; }
  385. private:
  386. std::string ProfileFileName;
  387. // Is this is context-sensitive instrumentation use.
  388. bool IsCS;
  389. bool runOnModule(Module &M) override;
  390. void getAnalysisUsage(AnalysisUsage &AU) const override {
  391. AU.addRequired<ProfileSummaryInfoWrapperPass>();
  392. AU.addRequired<BlockFrequencyInfoWrapperPass>();
  393. }
  394. };
  395. class PGOInstrumentationGenCreateVarLegacyPass : public ModulePass {
  396. public:
  397. static char ID;
  398. StringRef getPassName() const override {
  399. return "PGOInstrumentationGenCreateVarPass";
  400. }
  401. PGOInstrumentationGenCreateVarLegacyPass(std::string CSInstrName = "")
  402. : ModulePass(ID), InstrProfileOutput(CSInstrName) {
  403. initializePGOInstrumentationGenCreateVarLegacyPassPass(
  404. *PassRegistry::getPassRegistry());
  405. }
  406. private:
  407. bool runOnModule(Module &M) override {
  408. createProfileFileNameVar(M, InstrProfileOutput);
  409. createIRLevelProfileFlagVar(M, true);
  410. return false;
  411. }
  412. std::string InstrProfileOutput;
  413. };
  414. } // end anonymous namespace
  415. char PGOInstrumentationGenLegacyPass::ID = 0;
  416. INITIALIZE_PASS_BEGIN(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
  417. "PGO instrumentation.", false, false)
  418. INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
  419. INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
  420. INITIALIZE_PASS_END(PGOInstrumentationGenLegacyPass, "pgo-instr-gen",
  421. "PGO instrumentation.", false, false)
  422. ModulePass *llvm::createPGOInstrumentationGenLegacyPass(bool IsCS) {
  423. return new PGOInstrumentationGenLegacyPass(IsCS);
  424. }
  425. char PGOInstrumentationUseLegacyPass::ID = 0;
  426. INITIALIZE_PASS_BEGIN(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
  427. "Read PGO instrumentation profile.", false, false)
  428. INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
  429. INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass)
  430. INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
  431. INITIALIZE_PASS_END(PGOInstrumentationUseLegacyPass, "pgo-instr-use",
  432. "Read PGO instrumentation profile.", false, false)
  433. ModulePass *llvm::createPGOInstrumentationUseLegacyPass(StringRef Filename,
  434. bool IsCS) {
  435. return new PGOInstrumentationUseLegacyPass(Filename.str(), IsCS);
  436. }
  437. char PGOInstrumentationGenCreateVarLegacyPass::ID = 0;
  438. INITIALIZE_PASS(PGOInstrumentationGenCreateVarLegacyPass,
  439. "pgo-instr-gen-create-var",
  440. "Create PGO instrumentation version variable for CSPGO.", false,
  441. false)
  442. ModulePass *
  443. llvm::createPGOInstrumentationGenCreateVarLegacyPass(StringRef CSInstrName) {
  444. return new PGOInstrumentationGenCreateVarLegacyPass(CSInstrName);
  445. }
  446. namespace {
  447. /// An MST based instrumentation for PGO
  448. ///
  449. /// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO
  450. /// in the function level.
  451. struct PGOEdge {
  452. // This class implements the CFG edges. Note the CFG can be a multi-graph.
  453. // So there might be multiple edges with same SrcBB and DestBB.
  454. const BasicBlock *SrcBB;
  455. const BasicBlock *DestBB;
  456. uint64_t Weight;
  457. bool InMST = false;
  458. bool Removed = false;
  459. bool IsCritical = false;
  460. PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
  461. : SrcBB(Src), DestBB(Dest), Weight(W) {}
  462. // Return the information string of an edge.
  463. const std::string infoString() const {
  464. return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") +
  465. (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str();
  466. }
  467. };
  468. // This class stores the auxiliary information for each BB.
  469. struct BBInfo {
  470. BBInfo *Group;
  471. uint32_t Index;
  472. uint32_t Rank = 0;
  473. BBInfo(unsigned IX) : Group(this), Index(IX) {}
  474. // Return the information string of this object.
  475. const std::string infoString() const {
  476. return (Twine("Index=") + Twine(Index)).str();
  477. }
  478. // Empty function -- only applicable to UseBBInfo.
  479. void addOutEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
  480. // Empty function -- only applicable to UseBBInfo.
  481. void addInEdge(PGOEdge *E LLVM_ATTRIBUTE_UNUSED) {}
  482. };
  483. // This class implements the CFG edges. Note the CFG can be a multi-graph.
  484. template <class Edge, class BBInfo> class FuncPGOInstrumentation {
  485. private:
  486. Function &F;
  487. // Is this is context-sensitive instrumentation.
  488. bool IsCS;
  489. // A map that stores the Comdat group in function F.
  490. std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers;
  491. void computeCFGHash();
  492. void renameComdatFunction();
  493. public:
  494. std::vector<std::vector<Instruction *>> ValueSites;
  495. SelectInstVisitor SIVisitor;
  496. MemIntrinsicVisitor MIVisitor;
  497. std::string FuncName;
  498. GlobalVariable *FuncNameVar;
  499. // CFG hash value for this function.
  500. uint64_t FunctionHash = 0;
  501. // The Minimum Spanning Tree of function CFG.
  502. CFGMST<Edge, BBInfo> MST;
  503. // Collect all the BBs that will be instrumented, and store them in
  504. // InstrumentBBs.
  505. void getInstrumentBBs(std::vector<BasicBlock *> &InstrumentBBs);
  506. // Give an edge, find the BB that will be instrumented.
  507. // Return nullptr if there is no BB to be instrumented.
  508. BasicBlock *getInstrBB(Edge *E);
  509. // Return the auxiliary BB information.
  510. BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); }
  511. // Return the auxiliary BB information if available.
  512. BBInfo *findBBInfo(const BasicBlock *BB) const { return MST.findBBInfo(BB); }
  513. // Dump edges and BB information.
  514. void dumpInfo(std::string Str = "") const {
  515. MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " +
  516. Twine(FunctionHash) + "\t" + Str);
  517. }
  518. FuncPGOInstrumentation(
  519. Function &Func,
  520. std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
  521. bool CreateGlobalVar = false, BranchProbabilityInfo *BPI = nullptr,
  522. BlockFrequencyInfo *BFI = nullptr, bool IsCS = false)
  523. : F(Func), IsCS(IsCS), ComdatMembers(ComdatMembers),
  524. ValueSites(IPVK_Last + 1), SIVisitor(Func), MIVisitor(Func),
  525. MST(F, BPI, BFI) {
  526. // This should be done before CFG hash computation.
  527. SIVisitor.countSelects(Func);
  528. MIVisitor.countMemIntrinsics(Func);
  529. if (!IsCS) {
  530. NumOfPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
  531. NumOfPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
  532. NumOfPGOBB += MST.BBInfos.size();
  533. ValueSites[IPVK_IndirectCallTarget] = findIndirectCalls(Func);
  534. } else {
  535. NumOfCSPGOSelectInsts += SIVisitor.getNumOfSelectInsts();
  536. NumOfCSPGOMemIntrinsics += MIVisitor.getNumOfMemIntrinsics();
  537. NumOfCSPGOBB += MST.BBInfos.size();
  538. }
  539. ValueSites[IPVK_MemOPSize] = MIVisitor.findMemIntrinsics(Func);
  540. FuncName = getPGOFuncName(F);
  541. computeCFGHash();
  542. if (!ComdatMembers.empty())
  543. renameComdatFunction();
  544. LLVM_DEBUG(dumpInfo("after CFGMST"));
  545. for (auto &E : MST.AllEdges) {
  546. if (E->Removed)
  547. continue;
  548. IsCS ? NumOfCSPGOEdge++ : NumOfPGOEdge++;
  549. if (!E->InMST)
  550. IsCS ? NumOfCSPGOInstrument++ : NumOfPGOInstrument++;
  551. }
  552. if (CreateGlobalVar)
  553. FuncNameVar = createPGOFuncNameVar(F, FuncName);
  554. }
  555. };
  556. } // end anonymous namespace
  557. // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
  558. // value of each BB in the CFG. The higher 32 bits record the number of edges.
  559. template <class Edge, class BBInfo>
  560. void FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash() {
  561. std::vector<char> Indexes;
  562. JamCRC JC;
  563. for (auto &BB : F) {
  564. const Instruction *TI = BB.getTerminator();
  565. for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) {
  566. BasicBlock *Succ = TI->getSuccessor(I);
  567. auto BI = findBBInfo(Succ);
  568. if (BI == nullptr)
  569. continue;
  570. uint32_t Index = BI->Index;
  571. for (int J = 0; J < 4; J++)
  572. Indexes.push_back((char)(Index >> (J * 8)));
  573. }
  574. }
  575. JC.update(Indexes);
  576. // Hash format for context sensitive profile. Reserve 4 bits for other
  577. // information.
  578. FunctionHash = (uint64_t)SIVisitor.getNumOfSelectInsts() << 56 |
  579. (uint64_t)ValueSites[IPVK_IndirectCallTarget].size() << 48 |
  580. //(uint64_t)ValueSites[IPVK_MemOPSize].size() << 40 |
  581. (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC();
  582. // Reserve bit 60-63 for other information purpose.
  583. FunctionHash &= 0x0FFFFFFFFFFFFFFF;
  584. if (IsCS)
  585. NamedInstrProfRecord::setCSFlagInHash(FunctionHash);
  586. LLVM_DEBUG(dbgs() << "Function Hash Computation for " << F.getName() << ":\n"
  587. << " CRC = " << JC.getCRC()
  588. << ", Selects = " << SIVisitor.getNumOfSelectInsts()
  589. << ", Edges = " << MST.AllEdges.size() << ", ICSites = "
  590. << ValueSites[IPVK_IndirectCallTarget].size()
  591. << ", Hash = " << FunctionHash << "\n";);
  592. }
  593. // Check if we can safely rename this Comdat function.
  594. static bool canRenameComdat(
  595. Function &F,
  596. std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
  597. if (!DoComdatRenaming || !canRenameComdatFunc(F, true))
  598. return false;
  599. // FIXME: Current only handle those Comdat groups that only containing one
  600. // function and function aliases.
  601. // (1) For a Comdat group containing multiple functions, we need to have a
  602. // unique postfix based on the hashes for each function. There is a
  603. // non-trivial code refactoring to do this efficiently.
  604. // (2) Variables can not be renamed, so we can not rename Comdat function in a
  605. // group including global vars.
  606. Comdat *C = F.getComdat();
  607. for (auto &&CM : make_range(ComdatMembers.equal_range(C))) {
  608. if (dyn_cast<GlobalAlias>(CM.second))
  609. continue;
  610. Function *FM = dyn_cast<Function>(CM.second);
  611. if (FM != &F)
  612. return false;
  613. }
  614. return true;
  615. }
  616. // Append the CFGHash to the Comdat function name.
  617. template <class Edge, class BBInfo>
  618. void FuncPGOInstrumentation<Edge, BBInfo>::renameComdatFunction() {
  619. if (!canRenameComdat(F, ComdatMembers))
  620. return;
  621. std::string OrigName = F.getName().str();
  622. std::string NewFuncName =
  623. Twine(F.getName() + "." + Twine(FunctionHash)).str();
  624. F.setName(Twine(NewFuncName));
  625. GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigName, &F);
  626. FuncName = Twine(FuncName + "." + Twine(FunctionHash)).str();
  627. Comdat *NewComdat;
  628. Module *M = F.getParent();
  629. // For AvailableExternallyLinkage functions, change the linkage to
  630. // LinkOnceODR and put them into comdat. This is because after renaming, there
  631. // is no backup external copy available for the function.
  632. if (!F.hasComdat()) {
  633. assert(F.getLinkage() == GlobalValue::AvailableExternallyLinkage);
  634. NewComdat = M->getOrInsertComdat(StringRef(NewFuncName));
  635. F.setLinkage(GlobalValue::LinkOnceODRLinkage);
  636. F.setComdat(NewComdat);
  637. return;
  638. }
  639. // This function belongs to a single function Comdat group.
  640. Comdat *OrigComdat = F.getComdat();
  641. std::string NewComdatName =
  642. Twine(OrigComdat->getName() + "." + Twine(FunctionHash)).str();
  643. NewComdat = M->getOrInsertComdat(StringRef(NewComdatName));
  644. NewComdat->setSelectionKind(OrigComdat->getSelectionKind());
  645. for (auto &&CM : make_range(ComdatMembers.equal_range(OrigComdat))) {
  646. if (GlobalAlias *GA = dyn_cast<GlobalAlias>(CM.second)) {
  647. // For aliases, change the name directly.
  648. assert(dyn_cast<Function>(GA->getAliasee()->stripPointerCasts()) == &F);
  649. std::string OrigGAName = GA->getName().str();
  650. GA->setName(Twine(GA->getName() + "." + Twine(FunctionHash)));
  651. GlobalAlias::create(GlobalValue::WeakAnyLinkage, OrigGAName, GA);
  652. continue;
  653. }
  654. // Must be a function.
  655. Function *CF = dyn_cast<Function>(CM.second);
  656. assert(CF);
  657. CF->setComdat(NewComdat);
  658. }
  659. }
  660. // Collect all the BBs that will be instruments and return them in
  661. // InstrumentBBs and setup InEdges/OutEdge for UseBBInfo.
  662. template <class Edge, class BBInfo>
  663. void FuncPGOInstrumentation<Edge, BBInfo>::getInstrumentBBs(
  664. std::vector<BasicBlock *> &InstrumentBBs) {
  665. // Use a worklist as we will update the vector during the iteration.
  666. std::vector<Edge *> EdgeList;
  667. EdgeList.reserve(MST.AllEdges.size());
  668. for (auto &E : MST.AllEdges)
  669. EdgeList.push_back(E.get());
  670. for (auto &E : EdgeList) {
  671. BasicBlock *InstrBB = getInstrBB(E);
  672. if (InstrBB)
  673. InstrumentBBs.push_back(InstrBB);
  674. }
  675. // Set up InEdges/OutEdges for all BBs.
  676. for (auto &E : MST.AllEdges) {
  677. if (E->Removed)
  678. continue;
  679. const BasicBlock *SrcBB = E->SrcBB;
  680. const BasicBlock *DestBB = E->DestBB;
  681. BBInfo &SrcInfo = getBBInfo(SrcBB);
  682. BBInfo &DestInfo = getBBInfo(DestBB);
  683. SrcInfo.addOutEdge(E.get());
  684. DestInfo.addInEdge(E.get());
  685. }
  686. }
  687. // Given a CFG E to be instrumented, find which BB to place the instrumented
  688. // code. The function will split the critical edge if necessary.
  689. template <class Edge, class BBInfo>
  690. BasicBlock *FuncPGOInstrumentation<Edge, BBInfo>::getInstrBB(Edge *E) {
  691. if (E->InMST || E->Removed)
  692. return nullptr;
  693. BasicBlock *SrcBB = const_cast<BasicBlock *>(E->SrcBB);
  694. BasicBlock *DestBB = const_cast<BasicBlock *>(E->DestBB);
  695. // For a fake edge, instrument the real BB.
  696. if (SrcBB == nullptr)
  697. return DestBB;
  698. if (DestBB == nullptr)
  699. return SrcBB;
  700. auto canInstrument = [](BasicBlock *BB) -> BasicBlock * {
  701. // There are basic blocks (such as catchswitch) cannot be instrumented.
  702. // If the returned first insertion point is the end of BB, skip this BB.
  703. if (BB->getFirstInsertionPt() == BB->end())
  704. return nullptr;
  705. return BB;
  706. };
  707. // Instrument the SrcBB if it has a single successor,
  708. // otherwise, the DestBB if this is not a critical edge.
  709. Instruction *TI = SrcBB->getTerminator();
  710. if (TI->getNumSuccessors() <= 1)
  711. return canInstrument(SrcBB);
  712. if (!E->IsCritical)
  713. return canInstrument(DestBB);
  714. unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
  715. BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum);
  716. if (!InstrBB) {
  717. LLVM_DEBUG(
  718. dbgs() << "Fail to split critical edge: not instrument this edge.\n");
  719. return nullptr;
  720. }
  721. // For a critical edge, we have to split. Instrument the newly
  722. // created BB.
  723. IsCS ? NumOfCSPGOSplit++ : NumOfPGOSplit++;
  724. LLVM_DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index
  725. << " --> " << getBBInfo(DestBB).Index << "\n");
  726. // Need to add two new edges. First one: Add new edge of SrcBB->InstrBB.
  727. MST.addEdge(SrcBB, InstrBB, 0);
  728. // Second one: Add new edge of InstrBB->DestBB.
  729. Edge &NewEdge1 = MST.addEdge(InstrBB, DestBB, 0);
  730. NewEdge1.InMST = true;
  731. E->Removed = true;
  732. return canInstrument(InstrBB);
  733. }
  734. // Visit all edge and instrument the edges not in MST, and do value profiling.
  735. // Critical edges will be split.
  736. static void instrumentOneFunc(
  737. Function &F, Module *M, BranchProbabilityInfo *BPI, BlockFrequencyInfo *BFI,
  738. std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
  739. bool IsCS) {
  740. // Split indirectbr critical edges here before computing the MST rather than
  741. // later in getInstrBB() to avoid invalidating it.
  742. SplitIndirectBrCriticalEdges(F, BPI, BFI);
  743. FuncPGOInstrumentation<PGOEdge, BBInfo> FuncInfo(F, ComdatMembers, true, BPI,
  744. BFI, IsCS);
  745. std::vector<BasicBlock *> InstrumentBBs;
  746. FuncInfo.getInstrumentBBs(InstrumentBBs);
  747. unsigned NumCounters =
  748. InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
  749. uint32_t I = 0;
  750. Type *I8PtrTy = Type::getInt8PtrTy(M->getContext());
  751. for (auto *InstrBB : InstrumentBBs) {
  752. IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt());
  753. assert(Builder.GetInsertPoint() != InstrBB->end() &&
  754. "Cannot get the Instrumentation point");
  755. Builder.CreateCall(
  756. Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment),
  757. {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
  758. Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters),
  759. Builder.getInt32(I++)});
  760. }
  761. // Now instrument select instructions:
  762. FuncInfo.SIVisitor.instrumentSelects(F, &I, NumCounters, FuncInfo.FuncNameVar,
  763. FuncInfo.FunctionHash);
  764. assert(I == NumCounters);
  765. if (DisableValueProfiling)
  766. return;
  767. unsigned NumIndirectCalls = 0;
  768. for (auto &I : FuncInfo.ValueSites[IPVK_IndirectCallTarget]) {
  769. CallSite CS(I);
  770. Value *Callee = CS.getCalledValue();
  771. LLVM_DEBUG(dbgs() << "Instrument one indirect call: CallSite Index = "
  772. << NumIndirectCalls << "\n");
  773. IRBuilder<> Builder(I);
  774. assert(Builder.GetInsertPoint() != I->getParent()->end() &&
  775. "Cannot get the Instrumentation point");
  776. Builder.CreateCall(
  777. Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
  778. {ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy),
  779. Builder.getInt64(FuncInfo.FunctionHash),
  780. Builder.CreatePtrToInt(Callee, Builder.getInt64Ty()),
  781. Builder.getInt32(IPVK_IndirectCallTarget),
  782. Builder.getInt32(NumIndirectCalls++)});
  783. }
  784. NumOfPGOICall += NumIndirectCalls;
  785. // Now instrument memop intrinsic calls.
  786. FuncInfo.MIVisitor.instrumentMemIntrinsics(
  787. F, NumCounters, FuncInfo.FuncNameVar, FuncInfo.FunctionHash);
  788. }
  789. namespace {
  790. // This class represents a CFG edge in profile use compilation.
  791. struct PGOUseEdge : public PGOEdge {
  792. bool CountValid = false;
  793. uint64_t CountValue = 0;
  794. PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W = 1)
  795. : PGOEdge(Src, Dest, W) {}
  796. // Set edge count value
  797. void setEdgeCount(uint64_t Value) {
  798. CountValue = Value;
  799. CountValid = true;
  800. }
  801. // Return the information string for this object.
  802. const std::string infoString() const {
  803. if (!CountValid)
  804. return PGOEdge::infoString();
  805. return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue))
  806. .str();
  807. }
  808. };
  809. using DirectEdges = SmallVector<PGOUseEdge *, 2>;
  810. // This class stores the auxiliary information for each BB.
  811. struct UseBBInfo : public BBInfo {
  812. uint64_t CountValue = 0;
  813. bool CountValid;
  814. int32_t UnknownCountInEdge = 0;
  815. int32_t UnknownCountOutEdge = 0;
  816. DirectEdges InEdges;
  817. DirectEdges OutEdges;
  818. UseBBInfo(unsigned IX) : BBInfo(IX), CountValid(false) {}
  819. UseBBInfo(unsigned IX, uint64_t C)
  820. : BBInfo(IX), CountValue(C), CountValid(true) {}
  821. // Set the profile count value for this BB.
  822. void setBBInfoCount(uint64_t Value) {
  823. CountValue = Value;
  824. CountValid = true;
  825. }
  826. // Return the information string of this object.
  827. const std::string infoString() const {
  828. if (!CountValid)
  829. return BBInfo::infoString();
  830. return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str();
  831. }
  832. // Add an OutEdge and update the edge count.
  833. void addOutEdge(PGOUseEdge *E) {
  834. OutEdges.push_back(E);
  835. UnknownCountOutEdge++;
  836. }
  837. // Add an InEdge and update the edge count.
  838. void addInEdge(PGOUseEdge *E) {
  839. InEdges.push_back(E);
  840. UnknownCountInEdge++;
  841. }
  842. };
  843. } // end anonymous namespace
  844. // Sum up the count values for all the edges.
  845. static uint64_t sumEdgeCount(const ArrayRef<PGOUseEdge *> Edges) {
  846. uint64_t Total = 0;
  847. for (auto &E : Edges) {
  848. if (E->Removed)
  849. continue;
  850. Total += E->CountValue;
  851. }
  852. return Total;
  853. }
  854. namespace {
  855. class PGOUseFunc {
  856. public:
  857. PGOUseFunc(Function &Func, Module *Modu,
  858. std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers,
  859. BranchProbabilityInfo *BPI = nullptr,
  860. BlockFrequencyInfo *BFIin = nullptr, bool IsCS = false)
  861. : F(Func), M(Modu), BFI(BFIin),
  862. FuncInfo(Func, ComdatMembers, false, BPI, BFIin, IsCS),
  863. FreqAttr(FFA_Normal), IsCS(IsCS) {}
  864. // Read counts for the instrumented BB from profile.
  865. bool readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros);
  866. // Populate the counts for all BBs.
  867. void populateCounters();
  868. // Set the branch weights based on the count values.
  869. void setBranchWeights();
  870. // Annotate the value profile call sites for all value kind.
  871. void annotateValueSites();
  872. // Annotate the value profile call sites for one value kind.
  873. void annotateValueSites(uint32_t Kind);
  874. // Annotate the irreducible loop header weights.
  875. void annotateIrrLoopHeaderWeights();
  876. // The hotness of the function from the profile count.
  877. enum FuncFreqAttr { FFA_Normal, FFA_Cold, FFA_Hot };
  878. // Return the function hotness from the profile.
  879. FuncFreqAttr getFuncFreqAttr() const { return FreqAttr; }
  880. // Return the function hash.
  881. uint64_t getFuncHash() const { return FuncInfo.FunctionHash; }
  882. // Return the profile record for this function;
  883. InstrProfRecord &getProfileRecord() { return ProfileRecord; }
  884. // Return the auxiliary BB information.
  885. UseBBInfo &getBBInfo(const BasicBlock *BB) const {
  886. return FuncInfo.getBBInfo(BB);
  887. }
  888. // Return the auxiliary BB information if available.
  889. UseBBInfo *findBBInfo(const BasicBlock *BB) const {
  890. return FuncInfo.findBBInfo(BB);
  891. }
  892. Function &getFunc() const { return F; }
  893. void dumpInfo(std::string Str = "") const {
  894. FuncInfo.dumpInfo(Str);
  895. }
  896. uint64_t getProgramMaxCount() const { return ProgramMaxCount; }
  897. private:
  898. Function &F;
  899. Module *M;
  900. BlockFrequencyInfo *BFI;
  901. // This member stores the shared information with class PGOGenFunc.
  902. FuncPGOInstrumentation<PGOUseEdge, UseBBInfo> FuncInfo;
  903. // The maximum count value in the profile. This is only used in PGO use
  904. // compilation.
  905. uint64_t ProgramMaxCount;
  906. // Position of counter that remains to be read.
  907. uint32_t CountPosition = 0;
  908. // Total size of the profile count for this function.
  909. uint32_t ProfileCountSize = 0;
  910. // ProfileRecord for this function.
  911. InstrProfRecord ProfileRecord;
  912. // Function hotness info derived from profile.
  913. FuncFreqAttr FreqAttr;
  914. // Is to use the context sensitive profile.
  915. bool IsCS;
  916. // Find the Instrumented BB and set the value. Return false on error.
  917. bool setInstrumentedCounts(const std::vector<uint64_t> &CountFromProfile);
  918. // Set the edge counter value for the unknown edge -- there should be only
  919. // one unknown edge.
  920. void setEdgeCount(DirectEdges &Edges, uint64_t Value);
  921. // Return FuncName string;
  922. const std::string getFuncName() const { return FuncInfo.FuncName; }
  923. // Set the hot/cold inline hints based on the count values.
  924. // FIXME: This function should be removed once the functionality in
  925. // the inliner is implemented.
  926. void markFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) {
  927. if (ProgramMaxCount == 0)
  928. return;
  929. // Threshold of the hot functions.
  930. const BranchProbability HotFunctionThreshold(1, 100);
  931. // Threshold of the cold functions.
  932. const BranchProbability ColdFunctionThreshold(2, 10000);
  933. if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount))
  934. FreqAttr = FFA_Hot;
  935. else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount))
  936. FreqAttr = FFA_Cold;
  937. }
  938. };
  939. } // end anonymous namespace
  940. // Visit all the edges and assign the count value for the instrumented
  941. // edges and the BB. Return false on error.
  942. bool PGOUseFunc::setInstrumentedCounts(
  943. const std::vector<uint64_t> &CountFromProfile) {
  944. std::vector<BasicBlock *> InstrumentBBs;
  945. FuncInfo.getInstrumentBBs(InstrumentBBs);
  946. unsigned NumCounters =
  947. InstrumentBBs.size() + FuncInfo.SIVisitor.getNumOfSelectInsts();
  948. // The number of counters here should match the number of counters
  949. // in profile. Return if they mismatch.
  950. if (NumCounters != CountFromProfile.size()) {
  951. return false;
  952. }
  953. // Set the profile count to the Instrumented BBs.
  954. uint32_t I = 0;
  955. for (BasicBlock *InstrBB : InstrumentBBs) {
  956. uint64_t CountValue = CountFromProfile[I++];
  957. UseBBInfo &Info = getBBInfo(InstrBB);
  958. Info.setBBInfoCount(CountValue);
  959. }
  960. ProfileCountSize = CountFromProfile.size();
  961. CountPosition = I;
  962. // Set the edge count and update the count of unknown edges for BBs.
  963. auto setEdgeCount = [this](PGOUseEdge *E, uint64_t Value) -> void {
  964. E->setEdgeCount(Value);
  965. this->getBBInfo(E->SrcBB).UnknownCountOutEdge--;
  966. this->getBBInfo(E->DestBB).UnknownCountInEdge--;
  967. };
  968. // Set the profile count the Instrumented edges. There are BBs that not in
  969. // MST but not instrumented. Need to set the edge count value so that we can
  970. // populate the profile counts later.
  971. for (auto &E : FuncInfo.MST.AllEdges) {
  972. if (E->Removed || E->InMST)
  973. continue;
  974. const BasicBlock *SrcBB = E->SrcBB;
  975. UseBBInfo &SrcInfo = getBBInfo(SrcBB);
  976. // If only one out-edge, the edge profile count should be the same as BB
  977. // profile count.
  978. if (SrcInfo.CountValid && SrcInfo.OutEdges.size() == 1)
  979. setEdgeCount(E.get(), SrcInfo.CountValue);
  980. else {
  981. const BasicBlock *DestBB = E->DestBB;
  982. UseBBInfo &DestInfo = getBBInfo(DestBB);
  983. // If only one in-edge, the edge profile count should be the same as BB
  984. // profile count.
  985. if (DestInfo.CountValid && DestInfo.InEdges.size() == 1)
  986. setEdgeCount(E.get(), DestInfo.CountValue);
  987. }
  988. if (E->CountValid)
  989. continue;
  990. // E's count should have been set from profile. If not, this meenas E skips
  991. // the instrumentation. We set the count to 0.
  992. setEdgeCount(E.get(), 0);
  993. }
  994. return true;
  995. }
  996. // Set the count value for the unknown edge. There should be one and only one
  997. // unknown edge in Edges vector.
  998. void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) {
  999. for (auto &E : Edges) {
  1000. if (E->CountValid)
  1001. continue;
  1002. E->setEdgeCount(Value);
  1003. getBBInfo(E->SrcBB).UnknownCountOutEdge--;
  1004. getBBInfo(E->DestBB).UnknownCountInEdge--;
  1005. return;
  1006. }
  1007. llvm_unreachable("Cannot find the unknown count edge");
  1008. }
  1009. // Read the profile from ProfileFileName and assign the value to the
  1010. // instrumented BB and the edges. This function also updates ProgramMaxCount.
  1011. // Return true if the profile are successfully read, and false on errors.
  1012. bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader, bool &AllZeros) {
  1013. auto &Ctx = M->getContext();
  1014. Expected<InstrProfRecord> Result =
  1015. PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash);
  1016. if (Error E = Result.takeError()) {
  1017. handleAllErrors(std::move(E), [&](const InstrProfError &IPE) {
  1018. auto Err = IPE.get();
  1019. bool SkipWarning = false;
  1020. LLVM_DEBUG(dbgs() << "Error in reading profile for Func "
  1021. << FuncInfo.FuncName << ": ");
  1022. if (Err == instrprof_error::unknown_function) {
  1023. IsCS ? NumOfCSPGOMissing++ : NumOfPGOMissing++;
  1024. SkipWarning = !PGOWarnMissing;
  1025. LLVM_DEBUG(dbgs() << "unknown function");
  1026. } else if (Err == instrprof_error::hash_mismatch ||
  1027. Err == instrprof_error::malformed) {
  1028. IsCS ? NumOfCSPGOMismatch++ : NumOfPGOMismatch++;
  1029. SkipWarning =
  1030. NoPGOWarnMismatch ||
  1031. (NoPGOWarnMismatchComdat &&
  1032. (F.hasComdat() ||
  1033. F.getLinkage() == GlobalValue::AvailableExternallyLinkage));
  1034. LLVM_DEBUG(dbgs() << "hash mismatch (skip=" << SkipWarning << ")");
  1035. }
  1036. LLVM_DEBUG(dbgs() << " IsCS=" << IsCS << "\n");
  1037. if (SkipWarning)
  1038. return;
  1039. std::string Msg = IPE.message() + std::string(" ") + F.getName().str() +
  1040. std::string(" Hash = ") +
  1041. std::to_string(FuncInfo.FunctionHash);
  1042. Ctx.diagnose(
  1043. DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning));
  1044. });
  1045. return false;
  1046. }
  1047. ProfileRecord = std::move(Result.get());
  1048. std::vector<uint64_t> &CountFromProfile = ProfileRecord.Counts;
  1049. IsCS ? NumOfCSPGOFunc++ : NumOfPGOFunc++;
  1050. LLVM_DEBUG(dbgs() << CountFromProfile.size() << " counts\n");
  1051. uint64_t ValueSum = 0;
  1052. for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) {
  1053. LLVM_DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n");
  1054. ValueSum += CountFromProfile[I];
  1055. }
  1056. AllZeros = (ValueSum == 0);
  1057. LLVM_DEBUG(dbgs() << "SUM = " << ValueSum << "\n");
  1058. getBBInfo(nullptr).UnknownCountOutEdge = 2;
  1059. getBBInfo(nullptr).UnknownCountInEdge = 2;
  1060. if (!setInstrumentedCounts(CountFromProfile)) {
  1061. LLVM_DEBUG(
  1062. dbgs() << "Inconsistent number of counts, skipping this function");
  1063. Ctx.diagnose(DiagnosticInfoPGOProfile(
  1064. M->getName().data(),
  1065. Twine("Inconsistent number of counts in ") + F.getName().str()
  1066. + Twine(": the profile may be stale or there is a function name collision."),
  1067. DS_Warning));
  1068. return false;
  1069. }
  1070. ProgramMaxCount = PGOReader->getMaximumFunctionCount(IsCS);
  1071. return true;
  1072. }
  1073. // Populate the counters from instrumented BBs to all BBs.
  1074. // In the end of this operation, all BBs should have a valid count value.
  1075. void PGOUseFunc::populateCounters() {
  1076. bool Changes = true;
  1077. unsigned NumPasses = 0;
  1078. while (Changes) {
  1079. NumPasses++;
  1080. Changes = false;
  1081. // For efficient traversal, it's better to start from the end as most
  1082. // of the instrumented edges are at the end.
  1083. for (auto &BB : reverse(F)) {
  1084. UseBBInfo *Count = findBBInfo(&BB);
  1085. if (Count == nullptr)
  1086. continue;
  1087. if (!Count->CountValid) {
  1088. if (Count->UnknownCountOutEdge == 0) {
  1089. Count->CountValue = sumEdgeCount(Count->OutEdges);
  1090. Count->CountValid = true;
  1091. Changes = true;
  1092. } else if (Count->UnknownCountInEdge == 0) {
  1093. Count->CountValue = sumEdgeCount(Count->InEdges);
  1094. Count->CountValid = true;
  1095. Changes = true;
  1096. }
  1097. }
  1098. if (Count->CountValid) {
  1099. if (Count->UnknownCountOutEdge == 1) {
  1100. uint64_t Total = 0;
  1101. uint64_t OutSum = sumEdgeCount(Count->OutEdges);
  1102. // If the one of the successor block can early terminate (no-return),
  1103. // we can end up with situation where out edge sum count is larger as
  1104. // the source BB's count is collected by a post-dominated block.
  1105. if (Count->CountValue > OutSum)
  1106. Total = Count->CountValue - OutSum;
  1107. setEdgeCount(Count->OutEdges, Total);
  1108. Changes = true;
  1109. }
  1110. if (Count->UnknownCountInEdge == 1) {
  1111. uint64_t Total = 0;
  1112. uint64_t InSum = sumEdgeCount(Count->InEdges);
  1113. if (Count->CountValue > InSum)
  1114. Total = Count->CountValue - InSum;
  1115. setEdgeCount(Count->InEdges, Total);
  1116. Changes = true;
  1117. }
  1118. }
  1119. }
  1120. }
  1121. LLVM_DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n");
  1122. #ifndef NDEBUG
  1123. // Assert every BB has a valid counter.
  1124. for (auto &BB : F) {
  1125. auto BI = findBBInfo(&BB);
  1126. if (BI == nullptr)
  1127. continue;
  1128. assert(BI->CountValid && "BB count is not valid");
  1129. }
  1130. #endif
  1131. uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue;
  1132. F.setEntryCount(ProfileCount(FuncEntryCount, Function::PCT_Real));
  1133. uint64_t FuncMaxCount = FuncEntryCount;
  1134. for (auto &BB : F) {
  1135. auto BI = findBBInfo(&BB);
  1136. if (BI == nullptr)
  1137. continue;
  1138. FuncMaxCount = std::max(FuncMaxCount, BI->CountValue);
  1139. }
  1140. markFunctionAttributes(FuncEntryCount, FuncMaxCount);
  1141. // Now annotate select instructions
  1142. FuncInfo.SIVisitor.annotateSelects(F, this, &CountPosition);
  1143. assert(CountPosition == ProfileCountSize);
  1144. LLVM_DEBUG(FuncInfo.dumpInfo("after reading profile."));
  1145. }
  1146. // Assign the scaled count values to the BB with multiple out edges.
  1147. void PGOUseFunc::setBranchWeights() {
  1148. // Generate MD_prof metadata for every branch instruction.
  1149. LLVM_DEBUG(dbgs() << "\nSetting branch weights for func " << F.getName()
  1150. << " IsCS=" << IsCS << "\n");
  1151. for (auto &BB : F) {
  1152. Instruction *TI = BB.getTerminator();
  1153. if (TI->getNumSuccessors() < 2)
  1154. continue;
  1155. if (!(isa<BranchInst>(TI) || isa<SwitchInst>(TI) ||
  1156. isa<IndirectBrInst>(TI)))
  1157. continue;
  1158. if (getBBInfo(&BB).CountValue == 0)
  1159. continue;
  1160. // We have a non-zero Branch BB.
  1161. const UseBBInfo &BBCountInfo = getBBInfo(&BB);
  1162. unsigned Size = BBCountInfo.OutEdges.size();
  1163. SmallVector<uint64_t, 2> EdgeCounts(Size, 0);
  1164. uint64_t MaxCount = 0;
  1165. for (unsigned s = 0; s < Size; s++) {
  1166. const PGOUseEdge *E = BBCountInfo.OutEdges[s];
  1167. const BasicBlock *SrcBB = E->SrcBB;
  1168. const BasicBlock *DestBB = E->DestBB;
  1169. if (DestBB == nullptr)
  1170. continue;
  1171. unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB);
  1172. uint64_t EdgeCount = E->CountValue;
  1173. if (EdgeCount > MaxCount)
  1174. MaxCount = EdgeCount;
  1175. EdgeCounts[SuccNum] = EdgeCount;
  1176. }
  1177. setProfMetadata(M, TI, EdgeCounts, MaxCount);
  1178. }
  1179. }
  1180. static bool isIndirectBrTarget(BasicBlock *BB) {
  1181. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
  1182. if (isa<IndirectBrInst>((*PI)->getTerminator()))
  1183. return true;
  1184. }
  1185. return false;
  1186. }
  1187. void PGOUseFunc::annotateIrrLoopHeaderWeights() {
  1188. LLVM_DEBUG(dbgs() << "\nAnnotating irreducible loop header weights.\n");
  1189. // Find irr loop headers
  1190. for (auto &BB : F) {
  1191. // As a heuristic also annotate indrectbr targets as they have a high chance
  1192. // to become an irreducible loop header after the indirectbr tail
  1193. // duplication.
  1194. if (BFI->isIrrLoopHeader(&BB) || isIndirectBrTarget(&BB)) {
  1195. Instruction *TI = BB.getTerminator();
  1196. const UseBBInfo &BBCountInfo = getBBInfo(&BB);
  1197. setIrrLoopHeaderMetadata(M, TI, BBCountInfo.CountValue);
  1198. }
  1199. }
  1200. }
  1201. void SelectInstVisitor::instrumentOneSelectInst(SelectInst &SI) {
  1202. Module *M = F.getParent();
  1203. IRBuilder<> Builder(&SI);
  1204. Type *Int64Ty = Builder.getInt64Ty();
  1205. Type *I8PtrTy = Builder.getInt8PtrTy();
  1206. auto *Step = Builder.CreateZExt(SI.getCondition(), Int64Ty);
  1207. Builder.CreateCall(
  1208. Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment_step),
  1209. {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
  1210. Builder.getInt64(FuncHash), Builder.getInt32(TotalNumCtrs),
  1211. Builder.getInt32(*CurCtrIdx), Step});
  1212. ++(*CurCtrIdx);
  1213. }
  1214. void SelectInstVisitor::annotateOneSelectInst(SelectInst &SI) {
  1215. std::vector<uint64_t> &CountFromProfile = UseFunc->getProfileRecord().Counts;
  1216. assert(*CurCtrIdx < CountFromProfile.size() &&
  1217. "Out of bound access of counters");
  1218. uint64_t SCounts[2];
  1219. SCounts[0] = CountFromProfile[*CurCtrIdx]; // True count
  1220. ++(*CurCtrIdx);
  1221. uint64_t TotalCount = 0;
  1222. auto BI = UseFunc->findBBInfo(SI.getParent());
  1223. if (BI != nullptr)
  1224. TotalCount = BI->CountValue;
  1225. // False Count
  1226. SCounts[1] = (TotalCount > SCounts[0] ? TotalCount - SCounts[0] : 0);
  1227. uint64_t MaxCount = std::max(SCounts[0], SCounts[1]);
  1228. if (MaxCount)
  1229. setProfMetadata(F.getParent(), &SI, SCounts, MaxCount);
  1230. }
  1231. void SelectInstVisitor::visitSelectInst(SelectInst &SI) {
  1232. if (!PGOInstrSelect)
  1233. return;
  1234. // FIXME: do not handle this yet.
  1235. if (SI.getCondition()->getType()->isVectorTy())
  1236. return;
  1237. switch (Mode) {
  1238. case VM_counting:
  1239. NSIs++;
  1240. return;
  1241. case VM_instrument:
  1242. instrumentOneSelectInst(SI);
  1243. return;
  1244. case VM_annotate:
  1245. annotateOneSelectInst(SI);
  1246. return;
  1247. }
  1248. llvm_unreachable("Unknown visiting mode");
  1249. }
  1250. void MemIntrinsicVisitor::instrumentOneMemIntrinsic(MemIntrinsic &MI) {
  1251. Module *M = F.getParent();
  1252. IRBuilder<> Builder(&MI);
  1253. Type *Int64Ty = Builder.getInt64Ty();
  1254. Type *I8PtrTy = Builder.getInt8PtrTy();
  1255. Value *Length = MI.getLength();
  1256. assert(!isa<ConstantInt>(Length));
  1257. Builder.CreateCall(
  1258. Intrinsic::getDeclaration(M, Intrinsic::instrprof_value_profile),
  1259. {ConstantExpr::getBitCast(FuncNameVar, I8PtrTy),
  1260. Builder.getInt64(FuncHash), Builder.CreateZExtOrTrunc(Length, Int64Ty),
  1261. Builder.getInt32(IPVK_MemOPSize), Builder.getInt32(CurCtrId)});
  1262. ++CurCtrId;
  1263. }
  1264. void MemIntrinsicVisitor::visitMemIntrinsic(MemIntrinsic &MI) {
  1265. if (!PGOInstrMemOP)
  1266. return;
  1267. Value *Length = MI.getLength();
  1268. // Not instrument constant length calls.
  1269. if (dyn_cast<ConstantInt>(Length))
  1270. return;
  1271. switch (Mode) {
  1272. case VM_counting:
  1273. NMemIs++;
  1274. return;
  1275. case VM_instrument:
  1276. instrumentOneMemIntrinsic(MI);
  1277. return;
  1278. case VM_annotate:
  1279. Candidates.push_back(&MI);
  1280. return;
  1281. }
  1282. llvm_unreachable("Unknown visiting mode");
  1283. }
  1284. // Traverse all valuesites and annotate the instructions for all value kind.
  1285. void PGOUseFunc::annotateValueSites() {
  1286. if (DisableValueProfiling)
  1287. return;
  1288. // Create the PGOFuncName meta data.
  1289. createPGOFuncNameMetadata(F, FuncInfo.FuncName);
  1290. for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind)
  1291. annotateValueSites(Kind);
  1292. }
  1293. static const char *ValueProfKindDescr[] = {
  1294. #define VALUE_PROF_KIND(Enumerator, Value, Descr) Descr,
  1295. #include "llvm/ProfileData/InstrProfData.inc"
  1296. };
  1297. // Annotate the instructions for a specific value kind.
  1298. void PGOUseFunc::annotateValueSites(uint32_t Kind) {
  1299. assert(Kind <= IPVK_Last);
  1300. unsigned ValueSiteIndex = 0;
  1301. auto &ValueSites = FuncInfo.ValueSites[Kind];
  1302. unsigned NumValueSites = ProfileRecord.getNumValueSites(Kind);
  1303. if (NumValueSites != ValueSites.size()) {
  1304. auto &Ctx = M->getContext();
  1305. Ctx.diagnose(DiagnosticInfoPGOProfile(
  1306. M->getName().data(),
  1307. Twine("Inconsistent number of value sites for ") +
  1308. Twine(ValueProfKindDescr[Kind]) +
  1309. Twine(" profiling in \"") + F.getName().str() +
  1310. Twine("\", possibly due to the use of a stale profile."),
  1311. DS_Warning));
  1312. return;
  1313. }
  1314. for (auto &I : ValueSites) {
  1315. LLVM_DEBUG(dbgs() << "Read one value site profile (kind = " << Kind
  1316. << "): Index = " << ValueSiteIndex << " out of "
  1317. << NumValueSites << "\n");
  1318. annotateValueSite(*M, *I, ProfileRecord,
  1319. static_cast<InstrProfValueKind>(Kind), ValueSiteIndex,
  1320. Kind == IPVK_MemOPSize ? MaxNumMemOPAnnotations
  1321. : MaxNumAnnotations);
  1322. ValueSiteIndex++;
  1323. }
  1324. }
  1325. // Collect the set of members for each Comdat in module M and store
  1326. // in ComdatMembers.
  1327. static void collectComdatMembers(
  1328. Module &M,
  1329. std::unordered_multimap<Comdat *, GlobalValue *> &ComdatMembers) {
  1330. if (!DoComdatRenaming)
  1331. return;
  1332. for (Function &F : M)
  1333. if (Comdat *C = F.getComdat())
  1334. ComdatMembers.insert(std::make_pair(C, &F));
  1335. for (GlobalVariable &GV : M.globals())
  1336. if (Comdat *C = GV.getComdat())
  1337. ComdatMembers.insert(std::make_pair(C, &GV));
  1338. for (GlobalAlias &GA : M.aliases())
  1339. if (Comdat *C = GA.getComdat())
  1340. ComdatMembers.insert(std::make_pair(C, &GA));
  1341. }
  1342. static bool InstrumentAllFunctions(
  1343. Module &M, function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
  1344. function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
  1345. // For the context-sensitve instrumentation, we should have a separated pass
  1346. // (before LTO/ThinLTO linking) to create these variables.
  1347. if (!IsCS)
  1348. createIRLevelProfileFlagVar(M, /* IsCS */ false);
  1349. std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
  1350. collectComdatMembers(M, ComdatMembers);
  1351. for (auto &F : M) {
  1352. if (F.isDeclaration())
  1353. continue;
  1354. auto *BPI = LookupBPI(F);
  1355. auto *BFI = LookupBFI(F);
  1356. instrumentOneFunc(F, &M, BPI, BFI, ComdatMembers, IsCS);
  1357. }
  1358. return true;
  1359. }
  1360. PreservedAnalyses
  1361. PGOInstrumentationGenCreateVar::run(Module &M, ModuleAnalysisManager &AM) {
  1362. createProfileFileNameVar(M, CSInstrName);
  1363. createIRLevelProfileFlagVar(M, /* IsCS */ true);
  1364. return PreservedAnalyses::all();
  1365. }
  1366. bool PGOInstrumentationGenLegacyPass::runOnModule(Module &M) {
  1367. if (skipModule(M))
  1368. return false;
  1369. auto LookupBPI = [this](Function &F) {
  1370. return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
  1371. };
  1372. auto LookupBFI = [this](Function &F) {
  1373. return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
  1374. };
  1375. return InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS);
  1376. }
  1377. PreservedAnalyses PGOInstrumentationGen::run(Module &M,
  1378. ModuleAnalysisManager &AM) {
  1379. auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
  1380. auto LookupBPI = [&FAM](Function &F) {
  1381. return &FAM.getResult<BranchProbabilityAnalysis>(F);
  1382. };
  1383. auto LookupBFI = [&FAM](Function &F) {
  1384. return &FAM.getResult<BlockFrequencyAnalysis>(F);
  1385. };
  1386. if (!InstrumentAllFunctions(M, LookupBPI, LookupBFI, IsCS))
  1387. return PreservedAnalyses::all();
  1388. return PreservedAnalyses::none();
  1389. }
  1390. static bool annotateAllFunctions(
  1391. Module &M, StringRef ProfileFileName, StringRef ProfileRemappingFileName,
  1392. function_ref<BranchProbabilityInfo *(Function &)> LookupBPI,
  1393. function_ref<BlockFrequencyInfo *(Function &)> LookupBFI, bool IsCS) {
  1394. LLVM_DEBUG(dbgs() << "Read in profile counters: ");
  1395. auto &Ctx = M.getContext();
  1396. // Read the counter array from file.
  1397. auto ReaderOrErr =
  1398. IndexedInstrProfReader::create(ProfileFileName, ProfileRemappingFileName);
  1399. if (Error E = ReaderOrErr.takeError()) {
  1400. handleAllErrors(std::move(E), [&](const ErrorInfoBase &EI) {
  1401. Ctx.diagnose(
  1402. DiagnosticInfoPGOProfile(ProfileFileName.data(), EI.message()));
  1403. });
  1404. return false;
  1405. }
  1406. std::unique_ptr<IndexedInstrProfReader> PGOReader =
  1407. std::move(ReaderOrErr.get());
  1408. if (!PGOReader) {
  1409. Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(),
  1410. StringRef("Cannot get PGOReader")));
  1411. return false;
  1412. }
  1413. if (!PGOReader->hasCSIRLevelProfile() && IsCS)
  1414. return false;
  1415. // TODO: might need to change the warning once the clang option is finalized.
  1416. if (!PGOReader->isIRLevelProfile()) {
  1417. Ctx.diagnose(DiagnosticInfoPGOProfile(
  1418. ProfileFileName.data(), "Not an IR level instrumentation profile"));
  1419. return false;
  1420. }
  1421. std::unordered_multimap<Comdat *, GlobalValue *> ComdatMembers;
  1422. collectComdatMembers(M, ComdatMembers);
  1423. std::vector<Function *> HotFunctions;
  1424. std::vector<Function *> ColdFunctions;
  1425. for (auto &F : M) {
  1426. if (F.isDeclaration())
  1427. continue;
  1428. auto *BPI = LookupBPI(F);
  1429. auto *BFI = LookupBFI(F);
  1430. // Split indirectbr critical edges here before computing the MST rather than
  1431. // later in getInstrBB() to avoid invalidating it.
  1432. SplitIndirectBrCriticalEdges(F, BPI, BFI);
  1433. PGOUseFunc Func(F, &M, ComdatMembers, BPI, BFI, IsCS);
  1434. bool AllZeros = false;
  1435. if (!Func.readCounters(PGOReader.get(), AllZeros))
  1436. continue;
  1437. if (AllZeros) {
  1438. F.setEntryCount(ProfileCount(0, Function::PCT_Real));
  1439. if (Func.getProgramMaxCount() != 0)
  1440. ColdFunctions.push_back(&F);
  1441. continue;
  1442. }
  1443. Func.populateCounters();
  1444. Func.setBranchWeights();
  1445. Func.annotateValueSites();
  1446. Func.annotateIrrLoopHeaderWeights();
  1447. PGOUseFunc::FuncFreqAttr FreqAttr = Func.getFuncFreqAttr();
  1448. if (FreqAttr == PGOUseFunc::FFA_Cold)
  1449. ColdFunctions.push_back(&F);
  1450. else if (FreqAttr == PGOUseFunc::FFA_Hot)
  1451. HotFunctions.push_back(&F);
  1452. if (PGOViewCounts != PGOVCT_None &&
  1453. (ViewBlockFreqFuncName.empty() ||
  1454. F.getName().equals(ViewBlockFreqFuncName))) {
  1455. LoopInfo LI{DominatorTree(F)};
  1456. std::unique_ptr<BranchProbabilityInfo> NewBPI =
  1457. std::make_unique<BranchProbabilityInfo>(F, LI);
  1458. std::unique_ptr<BlockFrequencyInfo> NewBFI =
  1459. std::make_unique<BlockFrequencyInfo>(F, *NewBPI, LI);
  1460. if (PGOViewCounts == PGOVCT_Graph)
  1461. NewBFI->view();
  1462. else if (PGOViewCounts == PGOVCT_Text) {
  1463. dbgs() << "pgo-view-counts: " << Func.getFunc().getName() << "\n";
  1464. NewBFI->print(dbgs());
  1465. }
  1466. }
  1467. if (PGOViewRawCounts != PGOVCT_None &&
  1468. (ViewBlockFreqFuncName.empty() ||
  1469. F.getName().equals(ViewBlockFreqFuncName))) {
  1470. if (PGOViewRawCounts == PGOVCT_Graph)
  1471. if (ViewBlockFreqFuncName.empty())
  1472. WriteGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
  1473. else
  1474. ViewGraph(&Func, Twine("PGORawCounts_") + Func.getFunc().getName());
  1475. else if (PGOViewRawCounts == PGOVCT_Text) {
  1476. dbgs() << "pgo-view-raw-counts: " << Func.getFunc().getName() << "\n";
  1477. Func.dumpInfo();
  1478. }
  1479. }
  1480. }
  1481. M.setProfileSummary(PGOReader->getSummary(IsCS).getMD(M.getContext()),
  1482. IsCS ? ProfileSummary::PSK_CSInstr
  1483. : ProfileSummary::PSK_Instr);
  1484. // Set function hotness attribute from the profile.
  1485. // We have to apply these attributes at the end because their presence
  1486. // can affect the BranchProbabilityInfo of any callers, resulting in an
  1487. // inconsistent MST between prof-gen and prof-use.
  1488. for (auto &F : HotFunctions) {
  1489. F->addFnAttr(Attribute::InlineHint);
  1490. LLVM_DEBUG(dbgs() << "Set inline attribute to function: " << F->getName()
  1491. << "\n");
  1492. }
  1493. for (auto &F : ColdFunctions) {
  1494. F->addFnAttr(Attribute::Cold);
  1495. LLVM_DEBUG(dbgs() << "Set cold attribute to function: " << F->getName()
  1496. << "\n");
  1497. }
  1498. return true;
  1499. }
  1500. PGOInstrumentationUse::PGOInstrumentationUse(std::string Filename,
  1501. std::string RemappingFilename,
  1502. bool IsCS)
  1503. : ProfileFileName(std::move(Filename)),
  1504. ProfileRemappingFileName(std::move(RemappingFilename)), IsCS(IsCS) {
  1505. if (!PGOTestProfileFile.empty())
  1506. ProfileFileName = PGOTestProfileFile;
  1507. if (!PGOTestProfileRemappingFile.empty())
  1508. ProfileRemappingFileName = PGOTestProfileRemappingFile;
  1509. }
  1510. PreservedAnalyses PGOInstrumentationUse::run(Module &M,
  1511. ModuleAnalysisManager &AM) {
  1512. auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
  1513. auto LookupBPI = [&FAM](Function &F) {
  1514. return &FAM.getResult<BranchProbabilityAnalysis>(F);
  1515. };
  1516. auto LookupBFI = [&FAM](Function &F) {
  1517. return &FAM.getResult<BlockFrequencyAnalysis>(F);
  1518. };
  1519. if (!annotateAllFunctions(M, ProfileFileName, ProfileRemappingFileName,
  1520. LookupBPI, LookupBFI, IsCS))
  1521. return PreservedAnalyses::all();
  1522. return PreservedAnalyses::none();
  1523. }
  1524. bool PGOInstrumentationUseLegacyPass::runOnModule(Module &M) {
  1525. if (skipModule(M))
  1526. return false;
  1527. auto LookupBPI = [this](Function &F) {
  1528. return &this->getAnalysis<BranchProbabilityInfoWrapperPass>(F).getBPI();
  1529. };
  1530. auto LookupBFI = [this](Function &F) {
  1531. return &this->getAnalysis<BlockFrequencyInfoWrapperPass>(F).getBFI();
  1532. };
  1533. return annotateAllFunctions(M, ProfileFileName, "", LookupBPI, LookupBFI,
  1534. IsCS);
  1535. }
  1536. static std::string getSimpleNodeName(const BasicBlock *Node) {
  1537. if (!Node->getName().empty())
  1538. return Node->getName();
  1539. std::string SimpleNodeName;
  1540. raw_string_ostream OS(SimpleNodeName);
  1541. Node->printAsOperand(OS, false);
  1542. return OS.str();
  1543. }
  1544. void llvm::setProfMetadata(Module *M, Instruction *TI,
  1545. ArrayRef<uint64_t> EdgeCounts,
  1546. uint64_t MaxCount) {
  1547. MDBuilder MDB(M->getContext());
  1548. assert(MaxCount > 0 && "Bad max count");
  1549. uint64_t Scale = calculateCountScale(MaxCount);
  1550. SmallVector<unsigned, 4> Weights;
  1551. for (const auto &ECI : EdgeCounts)
  1552. Weights.push_back(scaleBranchCount(ECI, Scale));
  1553. LLVM_DEBUG(dbgs() << "Weight is: "; for (const auto &W
  1554. : Weights) {
  1555. dbgs() << W << " ";
  1556. } dbgs() << "\n";);
  1557. misexpect::verifyMisExpect(TI, Weights, TI->getContext());
  1558. TI->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
  1559. if (EmitBranchProbability) {
  1560. std::string BrCondStr = getBranchCondString(TI);
  1561. if (BrCondStr.empty())
  1562. return;
  1563. uint64_t WSum =
  1564. std::accumulate(Weights.begin(), Weights.end(), (uint64_t)0,
  1565. [](uint64_t w1, uint64_t w2) { return w1 + w2; });
  1566. uint64_t TotalCount =
  1567. std::accumulate(EdgeCounts.begin(), EdgeCounts.end(), (uint64_t)0,
  1568. [](uint64_t c1, uint64_t c2) { return c1 + c2; });
  1569. Scale = calculateCountScale(WSum);
  1570. BranchProbability BP(scaleBranchCount(Weights[0], Scale),
  1571. scaleBranchCount(WSum, Scale));
  1572. std::string BranchProbStr;
  1573. raw_string_ostream OS(BranchProbStr);
  1574. OS << BP;
  1575. OS << " (total count : " << TotalCount << ")";
  1576. OS.flush();
  1577. Function *F = TI->getParent()->getParent();
  1578. OptimizationRemarkEmitter ORE(F);
  1579. ORE.emit([&]() {
  1580. return OptimizationRemark(DEBUG_TYPE, "pgo-instrumentation", TI)
  1581. << BrCondStr << " is true with probability : " << BranchProbStr;
  1582. });
  1583. }
  1584. }
  1585. namespace llvm {
  1586. void setIrrLoopHeaderMetadata(Module *M, Instruction *TI, uint64_t Count) {
  1587. MDBuilder MDB(M->getContext());
  1588. TI->setMetadata(llvm::LLVMContext::MD_irr_loop,
  1589. MDB.createIrrLoopHeaderWeight(Count));
  1590. }
  1591. template <> struct GraphTraits<PGOUseFunc *> {
  1592. using NodeRef = const BasicBlock *;
  1593. using ChildIteratorType = succ_const_iterator;
  1594. using nodes_iterator = pointer_iterator<Function::const_iterator>;
  1595. static NodeRef getEntryNode(const PGOUseFunc *G) {
  1596. return &G->getFunc().front();
  1597. }
  1598. static ChildIteratorType child_begin(const NodeRef N) {
  1599. return succ_begin(N);
  1600. }
  1601. static ChildIteratorType child_end(const NodeRef N) { return succ_end(N); }
  1602. static nodes_iterator nodes_begin(const PGOUseFunc *G) {
  1603. return nodes_iterator(G->getFunc().begin());
  1604. }
  1605. static nodes_iterator nodes_end(const PGOUseFunc *G) {
  1606. return nodes_iterator(G->getFunc().end());
  1607. }
  1608. };
  1609. template <> struct DOTGraphTraits<PGOUseFunc *> : DefaultDOTGraphTraits {
  1610. explicit DOTGraphTraits(bool isSimple = false)
  1611. : DefaultDOTGraphTraits(isSimple) {}
  1612. static std::string getGraphName(const PGOUseFunc *G) {
  1613. return G->getFunc().getName();
  1614. }
  1615. std::string getNodeLabel(const BasicBlock *Node, const PGOUseFunc *Graph) {
  1616. std::string Result;
  1617. raw_string_ostream OS(Result);
  1618. OS << getSimpleNodeName(Node) << ":\\l";
  1619. UseBBInfo *BI = Graph->findBBInfo(Node);
  1620. OS << "Count : ";
  1621. if (BI && BI->CountValid)
  1622. OS << BI->CountValue << "\\l";
  1623. else
  1624. OS << "Unknown\\l";
  1625. if (!PGOInstrSelect)
  1626. return Result;
  1627. for (auto BI = Node->begin(); BI != Node->end(); ++BI) {
  1628. auto *I = &*BI;
  1629. if (!isa<SelectInst>(I))
  1630. continue;
  1631. // Display scaled counts for SELECT instruction:
  1632. OS << "SELECT : { T = ";
  1633. uint64_t TC, FC;
  1634. bool HasProf = I->extractProfMetadata(TC, FC);
  1635. if (!HasProf)
  1636. OS << "Unknown, F = Unknown }\\l";
  1637. else
  1638. OS << TC << ", F = " << FC << " }\\l";
  1639. }
  1640. return Result;
  1641. }
  1642. };
  1643. } // end namespace llvm