PaddingChecker.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357
  1. //=======- PaddingChecker.cpp ------------------------------------*- C++ -*-==//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines a checker that checks for padding that could be
  10. // removed by re-ordering members.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
  14. #include "clang/AST/CharUnits.h"
  15. #include "clang/AST/DeclTemplate.h"
  16. #include "clang/AST/RecordLayout.h"
  17. #include "clang/AST/RecursiveASTVisitor.h"
  18. #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
  19. #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
  20. #include "clang/StaticAnalyzer/Core/Checker.h"
  21. #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
  22. #include "llvm/ADT/SmallString.h"
  23. #include "llvm/Support/MathExtras.h"
  24. #include "llvm/Support/raw_ostream.h"
  25. #include <numeric>
  26. using namespace clang;
  27. using namespace ento;
  28. namespace {
  29. class PaddingChecker : public Checker<check::ASTDecl<TranslationUnitDecl>> {
  30. private:
  31. mutable std::unique_ptr<BugType> PaddingBug;
  32. mutable BugReporter *BR;
  33. public:
  34. int64_t AllowedPad;
  35. void checkASTDecl(const TranslationUnitDecl *TUD, AnalysisManager &MGR,
  36. BugReporter &BRArg) const {
  37. BR = &BRArg;
  38. // The calls to checkAST* from AnalysisConsumer don't
  39. // visit template instantiations or lambda classes. We
  40. // want to visit those, so we make our own RecursiveASTVisitor.
  41. struct LocalVisitor : public RecursiveASTVisitor<LocalVisitor> {
  42. const PaddingChecker *Checker;
  43. bool shouldVisitTemplateInstantiations() const { return true; }
  44. bool shouldVisitImplicitCode() const { return true; }
  45. explicit LocalVisitor(const PaddingChecker *Checker) : Checker(Checker) {}
  46. bool VisitRecordDecl(const RecordDecl *RD) {
  47. Checker->visitRecord(RD);
  48. return true;
  49. }
  50. bool VisitVarDecl(const VarDecl *VD) {
  51. Checker->visitVariable(VD);
  52. return true;
  53. }
  54. // TODO: Visit array new and mallocs for arrays.
  55. };
  56. LocalVisitor visitor(this);
  57. visitor.TraverseDecl(const_cast<TranslationUnitDecl *>(TUD));
  58. }
  59. /// Look for records of overly padded types. If padding *
  60. /// PadMultiplier exceeds AllowedPad, then generate a report.
  61. /// PadMultiplier is used to share code with the array padding
  62. /// checker.
  63. void visitRecord(const RecordDecl *RD, uint64_t PadMultiplier = 1) const {
  64. if (shouldSkipDecl(RD))
  65. return;
  66. // TODO: Figure out why we are going through declarations and not only
  67. // definitions.
  68. if (!(RD = RD->getDefinition()))
  69. return;
  70. // This is the simplest correct case: a class with no fields and one base
  71. // class. Other cases are more complicated because of how the base classes
  72. // & fields might interact, so we don't bother dealing with them.
  73. // TODO: Support other combinations of base classes and fields.
  74. if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD))
  75. if (CXXRD->field_empty() && CXXRD->getNumBases() == 1)
  76. return visitRecord(CXXRD->bases().begin()->getType()->getAsRecordDecl(),
  77. PadMultiplier);
  78. auto &ASTContext = RD->getASTContext();
  79. const ASTRecordLayout &RL = ASTContext.getASTRecordLayout(RD);
  80. assert(llvm::isPowerOf2_64(RL.getAlignment().getQuantity()));
  81. CharUnits BaselinePad = calculateBaselinePad(RD, ASTContext, RL);
  82. if (BaselinePad.isZero())
  83. return;
  84. CharUnits OptimalPad;
  85. SmallVector<const FieldDecl *, 20> OptimalFieldsOrder;
  86. std::tie(OptimalPad, OptimalFieldsOrder) =
  87. calculateOptimalPad(RD, ASTContext, RL);
  88. CharUnits DiffPad = PadMultiplier * (BaselinePad - OptimalPad);
  89. if (DiffPad.getQuantity() <= AllowedPad) {
  90. assert(!DiffPad.isNegative() && "DiffPad should not be negative");
  91. // There is not enough excess padding to trigger a warning.
  92. return;
  93. }
  94. reportRecord(RD, BaselinePad, OptimalPad, OptimalFieldsOrder);
  95. }
  96. /// Look for arrays of overly padded types. If the padding of the
  97. /// array type exceeds AllowedPad, then generate a report.
  98. void visitVariable(const VarDecl *VD) const {
  99. const ArrayType *ArrTy = VD->getType()->getAsArrayTypeUnsafe();
  100. if (ArrTy == nullptr)
  101. return;
  102. uint64_t Elts = 0;
  103. if (const ConstantArrayType *CArrTy = dyn_cast<ConstantArrayType>(ArrTy))
  104. Elts = CArrTy->getSize().getZExtValue();
  105. if (Elts == 0)
  106. return;
  107. const RecordType *RT = ArrTy->getElementType()->getAs<RecordType>();
  108. if (RT == nullptr)
  109. return;
  110. // TODO: Recurse into the fields to see if they have excess padding.
  111. visitRecord(RT->getDecl(), Elts);
  112. }
  113. bool shouldSkipDecl(const RecordDecl *RD) const {
  114. // TODO: Figure out why we are going through declarations and not only
  115. // definitions.
  116. if (!(RD = RD->getDefinition()))
  117. return true;
  118. auto Location = RD->getLocation();
  119. // If the construct doesn't have a source file, then it's not something
  120. // we want to diagnose.
  121. if (!Location.isValid())
  122. return true;
  123. SrcMgr::CharacteristicKind Kind =
  124. BR->getSourceManager().getFileCharacteristic(Location);
  125. // Throw out all records that come from system headers.
  126. if (Kind != SrcMgr::C_User)
  127. return true;
  128. // Not going to attempt to optimize unions.
  129. if (RD->isUnion())
  130. return true;
  131. if (auto *CXXRD = dyn_cast<CXXRecordDecl>(RD)) {
  132. // Tail padding with base classes ends up being very complicated.
  133. // We will skip objects with base classes for now, unless they do not
  134. // have fields.
  135. // TODO: Handle more base class scenarios.
  136. if (!CXXRD->field_empty() && CXXRD->getNumBases() != 0)
  137. return true;
  138. if (CXXRD->field_empty() && CXXRD->getNumBases() != 1)
  139. return true;
  140. // Virtual bases are complicated, skipping those for now.
  141. if (CXXRD->getNumVBases() != 0)
  142. return true;
  143. // Can't layout a template, so skip it. We do still layout the
  144. // instantiations though.
  145. if (CXXRD->getTypeForDecl()->isDependentType())
  146. return true;
  147. if (CXXRD->getTypeForDecl()->isInstantiationDependentType())
  148. return true;
  149. }
  150. // How do you reorder fields if you haven't got any?
  151. else if (RD->field_empty())
  152. return true;
  153. auto IsTrickyField = [](const FieldDecl *FD) -> bool {
  154. // Bitfield layout is hard.
  155. if (FD->isBitField())
  156. return true;
  157. // Variable length arrays are tricky too.
  158. QualType Ty = FD->getType();
  159. if (Ty->isIncompleteArrayType())
  160. return true;
  161. return false;
  162. };
  163. if (std::any_of(RD->field_begin(), RD->field_end(), IsTrickyField))
  164. return true;
  165. return false;
  166. }
  167. static CharUnits calculateBaselinePad(const RecordDecl *RD,
  168. const ASTContext &ASTContext,
  169. const ASTRecordLayout &RL) {
  170. CharUnits PaddingSum;
  171. CharUnits Offset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0));
  172. for (const FieldDecl *FD : RD->fields()) {
  173. // This checker only cares about the padded size of the
  174. // field, and not the data size. If the field is a record
  175. // with tail padding, then we won't put that number in our
  176. // total because reordering fields won't fix that problem.
  177. CharUnits FieldSize = ASTContext.getTypeSizeInChars(FD->getType());
  178. auto FieldOffsetBits = RL.getFieldOffset(FD->getFieldIndex());
  179. CharUnits FieldOffset = ASTContext.toCharUnitsFromBits(FieldOffsetBits);
  180. PaddingSum += (FieldOffset - Offset);
  181. Offset = FieldOffset + FieldSize;
  182. }
  183. PaddingSum += RL.getSize() - Offset;
  184. return PaddingSum;
  185. }
  186. /// Optimal padding overview:
  187. /// 1. Find a close approximation to where we can place our first field.
  188. /// This will usually be at offset 0.
  189. /// 2. Try to find the best field that can legally be placed at the current
  190. /// offset.
  191. /// a. "Best" is the largest alignment that is legal, but smallest size.
  192. /// This is to account for overly aligned types.
  193. /// 3. If no fields can fit, pad by rounding the current offset up to the
  194. /// smallest alignment requirement of our fields. Measure and track the
  195. // amount of padding added. Go back to 2.
  196. /// 4. Increment the current offset by the size of the chosen field.
  197. /// 5. Remove the chosen field from the set of future possibilities.
  198. /// 6. Go back to 2 if there are still unplaced fields.
  199. /// 7. Add tail padding by rounding the current offset up to the structure
  200. /// alignment. Track the amount of padding added.
  201. static std::pair<CharUnits, SmallVector<const FieldDecl *, 20>>
  202. calculateOptimalPad(const RecordDecl *RD, const ASTContext &ASTContext,
  203. const ASTRecordLayout &RL) {
  204. struct FieldInfo {
  205. CharUnits Align;
  206. CharUnits Size;
  207. const FieldDecl *Field;
  208. bool operator<(const FieldInfo &RHS) const {
  209. // Order from small alignments to large alignments,
  210. // then large sizes to small sizes.
  211. // then large field indices to small field indices
  212. return std::make_tuple(Align, -Size,
  213. Field ? -static_cast<int>(Field->getFieldIndex())
  214. : 0) <
  215. std::make_tuple(
  216. RHS.Align, -RHS.Size,
  217. RHS.Field ? -static_cast<int>(RHS.Field->getFieldIndex())
  218. : 0);
  219. }
  220. };
  221. SmallVector<FieldInfo, 20> Fields;
  222. auto GatherSizesAndAlignments = [](const FieldDecl *FD) {
  223. FieldInfo RetVal;
  224. RetVal.Field = FD;
  225. auto &Ctx = FD->getASTContext();
  226. std::tie(RetVal.Size, RetVal.Align) =
  227. Ctx.getTypeInfoInChars(FD->getType());
  228. assert(llvm::isPowerOf2_64(RetVal.Align.getQuantity()));
  229. if (auto Max = FD->getMaxAlignment())
  230. RetVal.Align = std::max(Ctx.toCharUnitsFromBits(Max), RetVal.Align);
  231. return RetVal;
  232. };
  233. std::transform(RD->field_begin(), RD->field_end(),
  234. std::back_inserter(Fields), GatherSizesAndAlignments);
  235. llvm::sort(Fields);
  236. // This lets us skip over vptrs and non-virtual bases,
  237. // so that we can just worry about the fields in our object.
  238. // Note that this does cause us to miss some cases where we
  239. // could pack more bytes in to a base class's tail padding.
  240. CharUnits NewOffset = ASTContext.toCharUnitsFromBits(RL.getFieldOffset(0));
  241. CharUnits NewPad;
  242. SmallVector<const FieldDecl *, 20> OptimalFieldsOrder;
  243. while (!Fields.empty()) {
  244. unsigned TrailingZeros =
  245. llvm::countTrailingZeros((unsigned long long)NewOffset.getQuantity());
  246. // If NewOffset is zero, then countTrailingZeros will be 64. Shifting
  247. // 64 will overflow our unsigned long long. Shifting 63 will turn
  248. // our long long (and CharUnits internal type) negative. So shift 62.
  249. long long CurAlignmentBits = 1ull << (std::min)(TrailingZeros, 62u);
  250. CharUnits CurAlignment = CharUnits::fromQuantity(CurAlignmentBits);
  251. FieldInfo InsertPoint = {CurAlignment, CharUnits::Zero(), nullptr};
  252. auto CurBegin = Fields.begin();
  253. auto CurEnd = Fields.end();
  254. // In the typical case, this will find the last element
  255. // of the vector. We won't find a middle element unless
  256. // we started on a poorly aligned address or have an overly
  257. // aligned field.
  258. auto Iter = std::upper_bound(CurBegin, CurEnd, InsertPoint);
  259. if (Iter != CurBegin) {
  260. // We found a field that we can layout with the current alignment.
  261. --Iter;
  262. NewOffset += Iter->Size;
  263. OptimalFieldsOrder.push_back(Iter->Field);
  264. Fields.erase(Iter);
  265. } else {
  266. // We are poorly aligned, and we need to pad in order to layout another
  267. // field. Round up to at least the smallest field alignment that we
  268. // currently have.
  269. CharUnits NextOffset = NewOffset.alignTo(Fields[0].Align);
  270. NewPad += NextOffset - NewOffset;
  271. NewOffset = NextOffset;
  272. }
  273. }
  274. // Calculate tail padding.
  275. CharUnits NewSize = NewOffset.alignTo(RL.getAlignment());
  276. NewPad += NewSize - NewOffset;
  277. return {NewPad, std::move(OptimalFieldsOrder)};
  278. }
  279. void reportRecord(
  280. const RecordDecl *RD, CharUnits BaselinePad, CharUnits OptimalPad,
  281. const SmallVector<const FieldDecl *, 20> &OptimalFieldsOrder) const {
  282. if (!PaddingBug)
  283. PaddingBug =
  284. llvm::make_unique<BugType>(this, "Excessive Padding", "Performance");
  285. SmallString<100> Buf;
  286. llvm::raw_svector_ostream Os(Buf);
  287. Os << "Excessive padding in '";
  288. Os << QualType::getAsString(RD->getTypeForDecl(), Qualifiers(),
  289. LangOptions())
  290. << "'";
  291. if (auto *TSD = dyn_cast<ClassTemplateSpecializationDecl>(RD)) {
  292. // TODO: make this show up better in the console output and in
  293. // the HTML. Maybe just make it show up in HTML like the path
  294. // diagnostics show.
  295. SourceLocation ILoc = TSD->getPointOfInstantiation();
  296. if (ILoc.isValid())
  297. Os << " instantiated here: "
  298. << ILoc.printToString(BR->getSourceManager());
  299. }
  300. Os << " (" << BaselinePad.getQuantity() << " padding bytes, where "
  301. << OptimalPad.getQuantity() << " is optimal). \n"
  302. << "Optimal fields order: \n";
  303. for (const auto *FD : OptimalFieldsOrder)
  304. Os << FD->getName() << ", \n";
  305. Os << "consider reordering the fields or adding explicit padding "
  306. "members.";
  307. PathDiagnosticLocation CELoc =
  308. PathDiagnosticLocation::create(RD, BR->getSourceManager());
  309. auto Report = llvm::make_unique<BugReport>(*PaddingBug, Os.str(), CELoc);
  310. Report->setDeclWithIssue(RD);
  311. Report->addRange(RD->getSourceRange());
  312. BR->emitReport(std::move(Report));
  313. }
  314. };
  315. } // namespace
  316. void ento::registerPaddingChecker(CheckerManager &Mgr) {
  317. auto *Checker = Mgr.registerChecker<PaddingChecker>();
  318. Checker->AllowedPad = Mgr.getAnalyzerOptions()
  319. .getCheckerIntegerOption(Checker, "AllowedPad", 24);
  320. assert(Checker->AllowedPad >= 0 &&
  321. "AllowedPad option should be non-negative");
  322. }
  323. bool ento::shouldRegisterPaddingChecker(const LangOptions &LO) {
  324. return true;
  325. }