GenericTaintChecker.cpp 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786
  1. //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This checker defines the attack surface for generic taint propagation.
  11. //
  12. // The taint information produced by it might be useful to other checkers. For
  13. // example, checkers should report errors which involve tainted data more
  14. // aggressively, even if the involved symbols are under constrained.
  15. //
  16. //===----------------------------------------------------------------------===//
  17. #include "ClangSACheckers.h"
  18. #include "clang/AST/Attr.h"
  19. #include "clang/Basic/Builtins.h"
  20. #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
  21. #include "clang/StaticAnalyzer/Core/Checker.h"
  22. #include "clang/StaticAnalyzer/Core/CheckerManager.h"
  23. #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
  24. #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
  25. #include <climits>
  26. using namespace clang;
  27. using namespace ento;
  28. namespace {
  29. class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
  30. check::PreStmt<CallExpr> > {
  31. public:
  32. static void *getTag() { static int Tag; return &Tag; }
  33. void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
  34. void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
  35. private:
  36. static const unsigned InvalidArgIndex = UINT_MAX;
  37. /// Denotes the return vale.
  38. static const unsigned ReturnValueIndex = UINT_MAX - 1;
  39. mutable std::unique_ptr<BugType> BT;
  40. inline void initBugType() const {
  41. if (!BT)
  42. BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
  43. }
  44. /// \brief Catch taint related bugs. Check if tainted data is passed to a
  45. /// system call etc.
  46. bool checkPre(const CallExpr *CE, CheckerContext &C) const;
  47. /// \brief Add taint sources on a pre-visit.
  48. void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
  49. /// \brief Propagate taint generated at pre-visit.
  50. bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
  51. /// \brief Add taint sources on a post visit.
  52. void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
  53. /// Check if the region the expression evaluates to is the standard input,
  54. /// and thus, is tainted.
  55. static bool isStdin(const Expr *E, CheckerContext &C);
  56. /// \brief Given a pointer argument, return the value it points to.
  57. static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
  58. /// Functions defining the attack surface.
  59. typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
  60. CheckerContext &C) const;
  61. ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
  62. ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
  63. ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
  64. /// Taint the scanned input if the file is tainted.
  65. ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
  66. /// Check for CWE-134: Uncontrolled Format String.
  67. static const char MsgUncontrolledFormatString[];
  68. bool checkUncontrolledFormatString(const CallExpr *CE,
  69. CheckerContext &C) const;
  70. /// Check for:
  71. /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
  72. /// CWE-78, "Failure to Sanitize Data into an OS Command"
  73. static const char MsgSanitizeSystemArgs[];
  74. bool checkSystemCall(const CallExpr *CE, StringRef Name,
  75. CheckerContext &C) const;
  76. /// Check if tainted data is used as a buffer size ins strn.. functions,
  77. /// and allocators.
  78. static const char MsgTaintedBufferSize[];
  79. bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
  80. CheckerContext &C) const;
  81. /// Generate a report if the expression is tainted or points to tainted data.
  82. bool generateReportIfTainted(const Expr *E, const char Msg[],
  83. CheckerContext &C) const;
  84. /// The bug visitor prints a diagnostic message at the location where a given
  85. /// variable was tainted.
  86. class TaintBugVisitor
  87. : public BugReporterVisitorImpl<TaintBugVisitor> {
  88. private:
  89. const SVal V;
  90. public:
  91. TaintBugVisitor(const SVal V) : V(V) {}
  92. void Profile(llvm::FoldingSetNodeID &ID) const override { ID.Add(V); }
  93. std::shared_ptr<PathDiagnosticPiece> VisitNode(const ExplodedNode *N,
  94. const ExplodedNode *PrevN,
  95. BugReporterContext &BRC,
  96. BugReport &BR) override;
  97. };
  98. typedef SmallVector<unsigned, 2> ArgVector;
  99. /// \brief A struct used to specify taint propagation rules for a function.
  100. ///
  101. /// If any of the possible taint source arguments is tainted, all of the
  102. /// destination arguments should also be tainted. Use InvalidArgIndex in the
  103. /// src list to specify that all of the arguments can introduce taint. Use
  104. /// InvalidArgIndex in the dst arguments to signify that all the non-const
  105. /// pointer and reference arguments might be tainted on return. If
  106. /// ReturnValueIndex is added to the dst list, the return value will be
  107. /// tainted.
  108. struct TaintPropagationRule {
  109. /// List of arguments which can be taint sources and should be checked.
  110. ArgVector SrcArgs;
  111. /// List of arguments which should be tainted on function return.
  112. ArgVector DstArgs;
  113. // TODO: Check if using other data structures would be more optimal.
  114. TaintPropagationRule() {}
  115. TaintPropagationRule(unsigned SArg,
  116. unsigned DArg, bool TaintRet = false) {
  117. SrcArgs.push_back(SArg);
  118. DstArgs.push_back(DArg);
  119. if (TaintRet)
  120. DstArgs.push_back(ReturnValueIndex);
  121. }
  122. TaintPropagationRule(unsigned SArg1, unsigned SArg2,
  123. unsigned DArg, bool TaintRet = false) {
  124. SrcArgs.push_back(SArg1);
  125. SrcArgs.push_back(SArg2);
  126. DstArgs.push_back(DArg);
  127. if (TaintRet)
  128. DstArgs.push_back(ReturnValueIndex);
  129. }
  130. /// Get the propagation rule for a given function.
  131. static TaintPropagationRule
  132. getTaintPropagationRule(const FunctionDecl *FDecl,
  133. StringRef Name,
  134. CheckerContext &C);
  135. inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
  136. inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
  137. inline bool isNull() const { return SrcArgs.empty(); }
  138. inline bool isDestinationArgument(unsigned ArgNum) const {
  139. return (std::find(DstArgs.begin(),
  140. DstArgs.end(), ArgNum) != DstArgs.end());
  141. }
  142. static inline bool isTaintedOrPointsToTainted(const Expr *E,
  143. ProgramStateRef State,
  144. CheckerContext &C) {
  145. if (State->isTainted(E, C.getLocationContext()) || isStdin(E, C))
  146. return true;
  147. if (!E->getType().getTypePtr()->isPointerType())
  148. return false;
  149. Optional<SVal> V = getPointedToSVal(C, E);
  150. return (V && State->isTainted(*V));
  151. }
  152. /// \brief Pre-process a function which propagates taint according to the
  153. /// taint rule.
  154. ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
  155. };
  156. };
  157. const unsigned GenericTaintChecker::ReturnValueIndex;
  158. const unsigned GenericTaintChecker::InvalidArgIndex;
  159. const char GenericTaintChecker::MsgUncontrolledFormatString[] =
  160. "Untrusted data is used as a format string "
  161. "(CWE-134: Uncontrolled Format String)";
  162. const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
  163. "Untrusted data is passed to a system call "
  164. "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
  165. const char GenericTaintChecker::MsgTaintedBufferSize[] =
  166. "Untrusted data is used to specify the buffer size "
  167. "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
  168. "character data and the null terminator)";
  169. } // end of anonymous namespace
  170. /// A set which is used to pass information from call pre-visit instruction
  171. /// to the call post-visit. The values are unsigned integers, which are either
  172. /// ReturnValueIndex, or indexes of the pointer/reference argument, which
  173. /// points to data, which should be tainted on return.
  174. REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
  175. std::shared_ptr<PathDiagnosticPiece>
  176. GenericTaintChecker::TaintBugVisitor::VisitNode(const ExplodedNode *N,
  177. const ExplodedNode *PrevN, BugReporterContext &BRC, BugReport &BR) {
  178. // Find the ExplodedNode where the taint was first introduced
  179. if (!N->getState()->isTainted(V) || PrevN->getState()->isTainted(V))
  180. return nullptr;
  181. const Stmt *S = PathDiagnosticLocation::getStmt(N);
  182. if (!S)
  183. return nullptr;
  184. const LocationContext *NCtx = N->getLocationContext();
  185. PathDiagnosticLocation L =
  186. PathDiagnosticLocation::createBegin(S, BRC.getSourceManager(), NCtx);
  187. if (!L.isValid() || !L.asLocation().isValid())
  188. return nullptr;
  189. return std::make_shared<PathDiagnosticEventPiece>(
  190. L, "Taint originated here");
  191. }
  192. GenericTaintChecker::TaintPropagationRule
  193. GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
  194. const FunctionDecl *FDecl,
  195. StringRef Name,
  196. CheckerContext &C) {
  197. // TODO: Currently, we might lose precision here: we always mark a return
  198. // value as tainted even if it's just a pointer, pointing to tainted data.
  199. // Check for exact name match for functions without builtin substitutes.
  200. TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
  201. .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
  202. .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
  203. .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
  204. .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
  205. .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
  206. .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
  207. .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
  208. .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
  209. .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
  210. .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
  211. .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
  212. .Case("read", TaintPropagationRule(0, 2, 1, true))
  213. .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
  214. .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
  215. .Case("fgets", TaintPropagationRule(2, 0, true))
  216. .Case("getline", TaintPropagationRule(2, 0))
  217. .Case("getdelim", TaintPropagationRule(3, 0))
  218. .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
  219. .Default(TaintPropagationRule());
  220. if (!Rule.isNull())
  221. return Rule;
  222. // Check if it's one of the memory setting/copying functions.
  223. // This check is specialized but faster then calling isCLibraryFunction.
  224. unsigned BId = 0;
  225. if ( (BId = FDecl->getMemoryFunctionKind()) )
  226. switch(BId) {
  227. case Builtin::BImemcpy:
  228. case Builtin::BImemmove:
  229. case Builtin::BIstrncpy:
  230. case Builtin::BIstrncat:
  231. return TaintPropagationRule(1, 2, 0, true);
  232. case Builtin::BIstrlcpy:
  233. case Builtin::BIstrlcat:
  234. return TaintPropagationRule(1, 2, 0, false);
  235. case Builtin::BIstrndup:
  236. return TaintPropagationRule(0, 1, ReturnValueIndex);
  237. default:
  238. break;
  239. };
  240. // Process all other functions which could be defined as builtins.
  241. if (Rule.isNull()) {
  242. if (C.isCLibraryFunction(FDecl, "snprintf") ||
  243. C.isCLibraryFunction(FDecl, "sprintf"))
  244. return TaintPropagationRule(InvalidArgIndex, 0, true);
  245. else if (C.isCLibraryFunction(FDecl, "strcpy") ||
  246. C.isCLibraryFunction(FDecl, "stpcpy") ||
  247. C.isCLibraryFunction(FDecl, "strcat"))
  248. return TaintPropagationRule(1, 0, true);
  249. else if (C.isCLibraryFunction(FDecl, "bcopy"))
  250. return TaintPropagationRule(0, 2, 1, false);
  251. else if (C.isCLibraryFunction(FDecl, "strdup") ||
  252. C.isCLibraryFunction(FDecl, "strdupa"))
  253. return TaintPropagationRule(0, ReturnValueIndex);
  254. else if (C.isCLibraryFunction(FDecl, "wcsdup"))
  255. return TaintPropagationRule(0, ReturnValueIndex);
  256. }
  257. // Skipping the following functions, since they might be used for cleansing
  258. // or smart memory copy:
  259. // - memccpy - copying until hitting a special character.
  260. return TaintPropagationRule();
  261. }
  262. void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
  263. CheckerContext &C) const {
  264. // Check for errors first.
  265. if (checkPre(CE, C))
  266. return;
  267. // Add taint second.
  268. addSourcesPre(CE, C);
  269. }
  270. void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
  271. CheckerContext &C) const {
  272. if (propagateFromPre(CE, C))
  273. return;
  274. addSourcesPost(CE, C);
  275. }
  276. void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
  277. CheckerContext &C) const {
  278. ProgramStateRef State = nullptr;
  279. const FunctionDecl *FDecl = C.getCalleeDecl(CE);
  280. if (!FDecl || FDecl->getKind() != Decl::Function)
  281. return;
  282. StringRef Name = C.getCalleeName(FDecl);
  283. if (Name.empty())
  284. return;
  285. // First, try generating a propagation rule for this function.
  286. TaintPropagationRule Rule =
  287. TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
  288. if (!Rule.isNull()) {
  289. State = Rule.process(CE, C);
  290. if (!State)
  291. return;
  292. C.addTransition(State);
  293. return;
  294. }
  295. // Otherwise, check if we have custom pre-processing implemented.
  296. FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
  297. .Case("fscanf", &GenericTaintChecker::preFscanf)
  298. .Default(nullptr);
  299. // Check and evaluate the call.
  300. if (evalFunction)
  301. State = (this->*evalFunction)(CE, C);
  302. if (!State)
  303. return;
  304. C.addTransition(State);
  305. }
  306. bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
  307. CheckerContext &C) const {
  308. ProgramStateRef State = C.getState();
  309. // Depending on what was tainted at pre-visit, we determined a set of
  310. // arguments which should be tainted after the function returns. These are
  311. // stored in the state as TaintArgsOnPostVisit set.
  312. TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
  313. if (TaintArgs.isEmpty())
  314. return false;
  315. for (llvm::ImmutableSet<unsigned>::iterator
  316. I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
  317. unsigned ArgNum = *I;
  318. // Special handling for the tainted return value.
  319. if (ArgNum == ReturnValueIndex) {
  320. State = State->addTaint(CE, C.getLocationContext());
  321. continue;
  322. }
  323. // The arguments are pointer arguments. The data they are pointing at is
  324. // tainted after the call.
  325. if (CE->getNumArgs() < (ArgNum + 1))
  326. return false;
  327. const Expr* Arg = CE->getArg(ArgNum);
  328. Optional<SVal> V = getPointedToSVal(C, Arg);
  329. if (V)
  330. State = State->addTaint(*V);
  331. }
  332. // Clear up the taint info from the state.
  333. State = State->remove<TaintArgsOnPostVisit>();
  334. if (State != C.getState()) {
  335. C.addTransition(State);
  336. return true;
  337. }
  338. return false;
  339. }
  340. void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
  341. CheckerContext &C) const {
  342. // Define the attack surface.
  343. // Set the evaluation function by switching on the callee name.
  344. const FunctionDecl *FDecl = C.getCalleeDecl(CE);
  345. if (!FDecl || FDecl->getKind() != Decl::Function)
  346. return;
  347. StringRef Name = C.getCalleeName(FDecl);
  348. if (Name.empty())
  349. return;
  350. FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
  351. .Case("scanf", &GenericTaintChecker::postScanf)
  352. // TODO: Add support for vfscanf & family.
  353. .Case("getchar", &GenericTaintChecker::postRetTaint)
  354. .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
  355. .Case("getenv", &GenericTaintChecker::postRetTaint)
  356. .Case("fopen", &GenericTaintChecker::postRetTaint)
  357. .Case("fdopen", &GenericTaintChecker::postRetTaint)
  358. .Case("freopen", &GenericTaintChecker::postRetTaint)
  359. .Case("getch", &GenericTaintChecker::postRetTaint)
  360. .Case("wgetch", &GenericTaintChecker::postRetTaint)
  361. .Case("socket", &GenericTaintChecker::postSocket)
  362. .Default(nullptr);
  363. // If the callee isn't defined, it is not of security concern.
  364. // Check and evaluate the call.
  365. ProgramStateRef State = nullptr;
  366. if (evalFunction)
  367. State = (this->*evalFunction)(CE, C);
  368. if (!State)
  369. return;
  370. C.addTransition(State);
  371. }
  372. bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
  373. if (checkUncontrolledFormatString(CE, C))
  374. return true;
  375. const FunctionDecl *FDecl = C.getCalleeDecl(CE);
  376. if (!FDecl || FDecl->getKind() != Decl::Function)
  377. return false;
  378. StringRef Name = C.getCalleeName(FDecl);
  379. if (Name.empty())
  380. return false;
  381. if (checkSystemCall(CE, Name, C))
  382. return true;
  383. if (checkTaintedBufferSize(CE, FDecl, C))
  384. return true;
  385. return false;
  386. }
  387. Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
  388. const Expr *Arg) {
  389. ProgramStateRef State = C.getState();
  390. SVal AddrVal = C.getSVal(Arg->IgnoreParens());
  391. if (AddrVal.isUnknownOrUndef())
  392. return None;
  393. Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
  394. if (!AddrLoc)
  395. return None;
  396. QualType ArgTy = Arg->getType().getCanonicalType();
  397. if (!ArgTy->isPointerType())
  398. return None;
  399. QualType ValTy = ArgTy->getPointeeType();
  400. // Do not dereference void pointers. Treat them as byte pointers instead.
  401. // FIXME: we might want to consider more than just the first byte.
  402. if (ValTy->isVoidType())
  403. ValTy = C.getASTContext().CharTy;
  404. return State->getSVal(*AddrLoc, ValTy);
  405. }
  406. ProgramStateRef
  407. GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
  408. CheckerContext &C) const {
  409. ProgramStateRef State = C.getState();
  410. // Check for taint in arguments.
  411. bool IsTainted = false;
  412. for (ArgVector::const_iterator I = SrcArgs.begin(),
  413. E = SrcArgs.end(); I != E; ++I) {
  414. unsigned ArgNum = *I;
  415. if (ArgNum == InvalidArgIndex) {
  416. // Check if any of the arguments is tainted, but skip the
  417. // destination arguments.
  418. for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
  419. if (isDestinationArgument(i))
  420. continue;
  421. if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
  422. break;
  423. }
  424. break;
  425. }
  426. if (CE->getNumArgs() < (ArgNum + 1))
  427. return State;
  428. if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
  429. break;
  430. }
  431. if (!IsTainted)
  432. return State;
  433. // Mark the arguments which should be tainted after the function returns.
  434. for (ArgVector::const_iterator I = DstArgs.begin(),
  435. E = DstArgs.end(); I != E; ++I) {
  436. unsigned ArgNum = *I;
  437. // Should we mark all arguments as tainted?
  438. if (ArgNum == InvalidArgIndex) {
  439. // For all pointer and references that were passed in:
  440. // If they are not pointing to const data, mark data as tainted.
  441. // TODO: So far we are just going one level down; ideally we'd need to
  442. // recurse here.
  443. for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
  444. const Expr *Arg = CE->getArg(i);
  445. // Process pointer argument.
  446. const Type *ArgTy = Arg->getType().getTypePtr();
  447. QualType PType = ArgTy->getPointeeType();
  448. if ((!PType.isNull() && !PType.isConstQualified())
  449. || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
  450. State = State->add<TaintArgsOnPostVisit>(i);
  451. }
  452. continue;
  453. }
  454. // Should mark the return value?
  455. if (ArgNum == ReturnValueIndex) {
  456. State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
  457. continue;
  458. }
  459. // Mark the given argument.
  460. assert(ArgNum < CE->getNumArgs());
  461. State = State->add<TaintArgsOnPostVisit>(ArgNum);
  462. }
  463. return State;
  464. }
  465. // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
  466. // and arg 1 should get taint.
  467. ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
  468. CheckerContext &C) const {
  469. assert(CE->getNumArgs() >= 2);
  470. ProgramStateRef State = C.getState();
  471. // Check is the file descriptor is tainted.
  472. if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
  473. isStdin(CE->getArg(0), C)) {
  474. // All arguments except for the first two should get taint.
  475. for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
  476. State = State->add<TaintArgsOnPostVisit>(i);
  477. return State;
  478. }
  479. return nullptr;
  480. }
  481. // If argument 0(protocol domain) is network, the return value should get taint.
  482. ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
  483. CheckerContext &C) const {
  484. ProgramStateRef State = C.getState();
  485. if (CE->getNumArgs() < 3)
  486. return State;
  487. SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
  488. StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
  489. // White list the internal communication protocols.
  490. if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
  491. DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
  492. return State;
  493. State = State->addTaint(CE, C.getLocationContext());
  494. return State;
  495. }
  496. ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
  497. CheckerContext &C) const {
  498. ProgramStateRef State = C.getState();
  499. if (CE->getNumArgs() < 2)
  500. return State;
  501. // All arguments except for the very first one should get taint.
  502. for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
  503. // The arguments are pointer arguments. The data they are pointing at is
  504. // tainted after the call.
  505. const Expr* Arg = CE->getArg(i);
  506. Optional<SVal> V = getPointedToSVal(C, Arg);
  507. if (V)
  508. State = State->addTaint(*V);
  509. }
  510. return State;
  511. }
  512. ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
  513. CheckerContext &C) const {
  514. return C.getState()->addTaint(CE, C.getLocationContext());
  515. }
  516. bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
  517. ProgramStateRef State = C.getState();
  518. SVal Val = C.getSVal(E);
  519. // stdin is a pointer, so it would be a region.
  520. const MemRegion *MemReg = Val.getAsRegion();
  521. // The region should be symbolic, we do not know it's value.
  522. const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
  523. if (!SymReg)
  524. return false;
  525. // Get it's symbol and find the declaration region it's pointing to.
  526. const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
  527. if (!Sm)
  528. return false;
  529. const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
  530. if (!DeclReg)
  531. return false;
  532. // This region corresponds to a declaration, find out if it's a global/extern
  533. // variable named stdin with the proper type.
  534. if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
  535. D = D->getCanonicalDecl();
  536. if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
  537. if (const PointerType * PtrTy =
  538. dyn_cast<PointerType>(D->getType().getTypePtr()))
  539. if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
  540. return true;
  541. }
  542. return false;
  543. }
  544. static bool getPrintfFormatArgumentNum(const CallExpr *CE,
  545. const CheckerContext &C,
  546. unsigned int &ArgNum) {
  547. // Find if the function contains a format string argument.
  548. // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
  549. // vsnprintf, syslog, custom annotated functions.
  550. const FunctionDecl *FDecl = C.getCalleeDecl(CE);
  551. if (!FDecl)
  552. return false;
  553. for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
  554. ArgNum = Format->getFormatIdx() - 1;
  555. if ((Format->getType()->getName() == "printf") &&
  556. CE->getNumArgs() > ArgNum)
  557. return true;
  558. }
  559. // Or if a function is named setproctitle (this is a heuristic).
  560. if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
  561. ArgNum = 0;
  562. return true;
  563. }
  564. return false;
  565. }
  566. bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
  567. const char Msg[],
  568. CheckerContext &C) const {
  569. assert(E);
  570. // Check for taint.
  571. ProgramStateRef State = C.getState();
  572. Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
  573. SVal TaintedSVal;
  574. if (PointedToSVal && State->isTainted(*PointedToSVal))
  575. TaintedSVal = *PointedToSVal;
  576. else if (State->isTainted(E, C.getLocationContext()))
  577. TaintedSVal = C.getSVal(E);
  578. else
  579. return false;
  580. // Generate diagnostic.
  581. if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
  582. initBugType();
  583. auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
  584. report->addRange(E->getSourceRange());
  585. report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
  586. C.emitReport(std::move(report));
  587. return true;
  588. }
  589. return false;
  590. }
  591. bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
  592. CheckerContext &C) const{
  593. // Check if the function contains a format string argument.
  594. unsigned int ArgNum = 0;
  595. if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
  596. return false;
  597. // If either the format string content or the pointer itself are tainted, warn.
  598. return generateReportIfTainted(CE->getArg(ArgNum),
  599. MsgUncontrolledFormatString, C);
  600. }
  601. bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
  602. StringRef Name,
  603. CheckerContext &C) const {
  604. // TODO: It might make sense to run this check on demand. In some cases,
  605. // we should check if the environment has been cleansed here. We also might
  606. // need to know if the user was reset before these calls(seteuid).
  607. unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
  608. .Case("system", 0)
  609. .Case("popen", 0)
  610. .Case("execl", 0)
  611. .Case("execle", 0)
  612. .Case("execlp", 0)
  613. .Case("execv", 0)
  614. .Case("execvp", 0)
  615. .Case("execvP", 0)
  616. .Case("execve", 0)
  617. .Case("dlopen", 0)
  618. .Default(UINT_MAX);
  619. if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
  620. return false;
  621. return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
  622. }
  623. // TODO: Should this check be a part of the CString checker?
  624. // If yes, should taint be a global setting?
  625. bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
  626. const FunctionDecl *FDecl,
  627. CheckerContext &C) const {
  628. // If the function has a buffer size argument, set ArgNum.
  629. unsigned ArgNum = InvalidArgIndex;
  630. unsigned BId = 0;
  631. if ( (BId = FDecl->getMemoryFunctionKind()) )
  632. switch(BId) {
  633. case Builtin::BImemcpy:
  634. case Builtin::BImemmove:
  635. case Builtin::BIstrncpy:
  636. ArgNum = 2;
  637. break;
  638. case Builtin::BIstrndup:
  639. ArgNum = 1;
  640. break;
  641. default:
  642. break;
  643. };
  644. if (ArgNum == InvalidArgIndex) {
  645. if (C.isCLibraryFunction(FDecl, "malloc") ||
  646. C.isCLibraryFunction(FDecl, "calloc") ||
  647. C.isCLibraryFunction(FDecl, "alloca"))
  648. ArgNum = 0;
  649. else if (C.isCLibraryFunction(FDecl, "memccpy"))
  650. ArgNum = 3;
  651. else if (C.isCLibraryFunction(FDecl, "realloc"))
  652. ArgNum = 1;
  653. else if (C.isCLibraryFunction(FDecl, "bcopy"))
  654. ArgNum = 2;
  655. }
  656. return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
  657. generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
  658. }
  659. void ento::registerGenericTaintChecker(CheckerManager &mgr) {
  660. mgr.registerChecker<GenericTaintChecker>();
  661. }