GenericTaintChecker.cpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740
  1. //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This checker defines the attack surface for generic taint propagation.
  11. //
  12. // The taint information produced by it might be useful to other checkers. For
  13. // example, checkers should report errors which involve tainted data more
  14. // aggressively, even if the involved symbols are under constrained.
  15. //
  16. //===----------------------------------------------------------------------===//
  17. #include "ClangSACheckers.h"
  18. #include "clang/AST/Attr.h"
  19. #include "clang/Basic/Builtins.h"
  20. #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
  21. #include "clang/StaticAnalyzer/Core/Checker.h"
  22. #include "clang/StaticAnalyzer/Core/CheckerManager.h"
  23. #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
  24. #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
  25. #include <climits>
  26. using namespace clang;
  27. using namespace ento;
  28. namespace {
  29. class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
  30. check::PreStmt<CallExpr> > {
  31. public:
  32. static void *getTag() { static int Tag; return &Tag; }
  33. void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
  34. void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
  35. private:
  36. static const unsigned InvalidArgIndex = UINT_MAX;
  37. /// Denotes the return vale.
  38. static const unsigned ReturnValueIndex = UINT_MAX - 1;
  39. mutable std::unique_ptr<BugType> BT;
  40. inline void initBugType() const {
  41. if (!BT)
  42. BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
  43. }
  44. /// \brief Catch taint related bugs. Check if tainted data is passed to a
  45. /// system call etc.
  46. bool checkPre(const CallExpr *CE, CheckerContext &C) const;
  47. /// \brief Add taint sources on a pre-visit.
  48. void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
  49. /// \brief Propagate taint generated at pre-visit.
  50. bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
  51. /// \brief Add taint sources on a post visit.
  52. void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
  53. /// Check if the region the expression evaluates to is the standard input,
  54. /// and thus, is tainted.
  55. static bool isStdin(const Expr *E, CheckerContext &C);
  56. /// \brief Given a pointer argument, get the symbol of the value it contains
  57. /// (points to).
  58. static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
  59. /// Functions defining the attack surface.
  60. typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
  61. CheckerContext &C) const;
  62. ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
  63. ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
  64. ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
  65. /// Taint the scanned input if the file is tainted.
  66. ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
  67. /// Check for CWE-134: Uncontrolled Format String.
  68. static const char MsgUncontrolledFormatString[];
  69. bool checkUncontrolledFormatString(const CallExpr *CE,
  70. CheckerContext &C) const;
  71. /// Check for:
  72. /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
  73. /// CWE-78, "Failure to Sanitize Data into an OS Command"
  74. static const char MsgSanitizeSystemArgs[];
  75. bool checkSystemCall(const CallExpr *CE, StringRef Name,
  76. CheckerContext &C) const;
  77. /// Check if tainted data is used as a buffer size ins strn.. functions,
  78. /// and allocators.
  79. static const char MsgTaintedBufferSize[];
  80. bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
  81. CheckerContext &C) const;
  82. /// Generate a report if the expression is tainted or points to tainted data.
  83. bool generateReportIfTainted(const Expr *E, const char Msg[],
  84. CheckerContext &C) const;
  85. typedef SmallVector<unsigned, 2> ArgVector;
  86. /// \brief A struct used to specify taint propagation rules for a function.
  87. ///
  88. /// If any of the possible taint source arguments is tainted, all of the
  89. /// destination arguments should also be tainted. Use InvalidArgIndex in the
  90. /// src list to specify that all of the arguments can introduce taint. Use
  91. /// InvalidArgIndex in the dst arguments to signify that all the non-const
  92. /// pointer and reference arguments might be tainted on return. If
  93. /// ReturnValueIndex is added to the dst list, the return value will be
  94. /// tainted.
  95. struct TaintPropagationRule {
  96. /// List of arguments which can be taint sources and should be checked.
  97. ArgVector SrcArgs;
  98. /// List of arguments which should be tainted on function return.
  99. ArgVector DstArgs;
  100. // TODO: Check if using other data structures would be more optimal.
  101. TaintPropagationRule() {}
  102. TaintPropagationRule(unsigned SArg,
  103. unsigned DArg, bool TaintRet = false) {
  104. SrcArgs.push_back(SArg);
  105. DstArgs.push_back(DArg);
  106. if (TaintRet)
  107. DstArgs.push_back(ReturnValueIndex);
  108. }
  109. TaintPropagationRule(unsigned SArg1, unsigned SArg2,
  110. unsigned DArg, bool TaintRet = false) {
  111. SrcArgs.push_back(SArg1);
  112. SrcArgs.push_back(SArg2);
  113. DstArgs.push_back(DArg);
  114. if (TaintRet)
  115. DstArgs.push_back(ReturnValueIndex);
  116. }
  117. /// Get the propagation rule for a given function.
  118. static TaintPropagationRule
  119. getTaintPropagationRule(const FunctionDecl *FDecl,
  120. StringRef Name,
  121. CheckerContext &C);
  122. inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
  123. inline void addDstArg(unsigned A) { DstArgs.push_back(A); }
  124. inline bool isNull() const { return SrcArgs.empty(); }
  125. inline bool isDestinationArgument(unsigned ArgNum) const {
  126. return (std::find(DstArgs.begin(),
  127. DstArgs.end(), ArgNum) != DstArgs.end());
  128. }
  129. static inline bool isTaintedOrPointsToTainted(const Expr *E,
  130. ProgramStateRef State,
  131. CheckerContext &C) {
  132. return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
  133. (E->getType().getTypePtr()->isPointerType() &&
  134. State->isTainted(getPointedToSymbol(C, E))));
  135. }
  136. /// \brief Pre-process a function which propagates taint according to the
  137. /// taint rule.
  138. ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
  139. };
  140. };
  141. const unsigned GenericTaintChecker::ReturnValueIndex;
  142. const unsigned GenericTaintChecker::InvalidArgIndex;
  143. const char GenericTaintChecker::MsgUncontrolledFormatString[] =
  144. "Untrusted data is used as a format string "
  145. "(CWE-134: Uncontrolled Format String)";
  146. const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
  147. "Untrusted data is passed to a system call "
  148. "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
  149. const char GenericTaintChecker::MsgTaintedBufferSize[] =
  150. "Untrusted data is used to specify the buffer size "
  151. "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
  152. "character data and the null terminator)";
  153. } // end of anonymous namespace
  154. /// A set which is used to pass information from call pre-visit instruction
  155. /// to the call post-visit. The values are unsigned integers, which are either
  156. /// ReturnValueIndex, or indexes of the pointer/reference argument, which
  157. /// points to data, which should be tainted on return.
  158. REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
  159. GenericTaintChecker::TaintPropagationRule
  160. GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
  161. const FunctionDecl *FDecl,
  162. StringRef Name,
  163. CheckerContext &C) {
  164. // TODO: Currently, we might lose precision here: we always mark a return
  165. // value as tainted even if it's just a pointer, pointing to tainted data.
  166. // Check for exact name match for functions without builtin substitutes.
  167. TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
  168. .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
  169. .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
  170. .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
  171. .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
  172. .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
  173. .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
  174. .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
  175. .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
  176. .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
  177. .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
  178. .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
  179. .Case("read", TaintPropagationRule(0, 2, 1, true))
  180. .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
  181. .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
  182. .Case("fgets", TaintPropagationRule(2, 0, true))
  183. .Case("getline", TaintPropagationRule(2, 0))
  184. .Case("getdelim", TaintPropagationRule(3, 0))
  185. .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
  186. .Default(TaintPropagationRule());
  187. if (!Rule.isNull())
  188. return Rule;
  189. // Check if it's one of the memory setting/copying functions.
  190. // This check is specialized but faster then calling isCLibraryFunction.
  191. unsigned BId = 0;
  192. if ( (BId = FDecl->getMemoryFunctionKind()) )
  193. switch(BId) {
  194. case Builtin::BImemcpy:
  195. case Builtin::BImemmove:
  196. case Builtin::BIstrncpy:
  197. case Builtin::BIstrncat:
  198. return TaintPropagationRule(1, 2, 0, true);
  199. case Builtin::BIstrlcpy:
  200. case Builtin::BIstrlcat:
  201. return TaintPropagationRule(1, 2, 0, false);
  202. case Builtin::BIstrndup:
  203. return TaintPropagationRule(0, 1, ReturnValueIndex);
  204. default:
  205. break;
  206. };
  207. // Process all other functions which could be defined as builtins.
  208. if (Rule.isNull()) {
  209. if (C.isCLibraryFunction(FDecl, "snprintf") ||
  210. C.isCLibraryFunction(FDecl, "sprintf"))
  211. return TaintPropagationRule(InvalidArgIndex, 0, true);
  212. else if (C.isCLibraryFunction(FDecl, "strcpy") ||
  213. C.isCLibraryFunction(FDecl, "stpcpy") ||
  214. C.isCLibraryFunction(FDecl, "strcat"))
  215. return TaintPropagationRule(1, 0, true);
  216. else if (C.isCLibraryFunction(FDecl, "bcopy"))
  217. return TaintPropagationRule(0, 2, 1, false);
  218. else if (C.isCLibraryFunction(FDecl, "strdup") ||
  219. C.isCLibraryFunction(FDecl, "strdupa"))
  220. return TaintPropagationRule(0, ReturnValueIndex);
  221. else if (C.isCLibraryFunction(FDecl, "wcsdup"))
  222. return TaintPropagationRule(0, ReturnValueIndex);
  223. }
  224. // Skipping the following functions, since they might be used for cleansing
  225. // or smart memory copy:
  226. // - memccpy - copying until hitting a special character.
  227. return TaintPropagationRule();
  228. }
  229. void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
  230. CheckerContext &C) const {
  231. // Check for errors first.
  232. if (checkPre(CE, C))
  233. return;
  234. // Add taint second.
  235. addSourcesPre(CE, C);
  236. }
  237. void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
  238. CheckerContext &C) const {
  239. if (propagateFromPre(CE, C))
  240. return;
  241. addSourcesPost(CE, C);
  242. }
  243. void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
  244. CheckerContext &C) const {
  245. ProgramStateRef State = nullptr;
  246. const FunctionDecl *FDecl = C.getCalleeDecl(CE);
  247. if (!FDecl || FDecl->getKind() != Decl::Function)
  248. return;
  249. StringRef Name = C.getCalleeName(FDecl);
  250. if (Name.empty())
  251. return;
  252. // First, try generating a propagation rule for this function.
  253. TaintPropagationRule Rule =
  254. TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
  255. if (!Rule.isNull()) {
  256. State = Rule.process(CE, C);
  257. if (!State)
  258. return;
  259. C.addTransition(State);
  260. return;
  261. }
  262. // Otherwise, check if we have custom pre-processing implemented.
  263. FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
  264. .Case("fscanf", &GenericTaintChecker::preFscanf)
  265. .Default(nullptr);
  266. // Check and evaluate the call.
  267. if (evalFunction)
  268. State = (this->*evalFunction)(CE, C);
  269. if (!State)
  270. return;
  271. C.addTransition(State);
  272. }
  273. bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
  274. CheckerContext &C) const {
  275. ProgramStateRef State = C.getState();
  276. // Depending on what was tainted at pre-visit, we determined a set of
  277. // arguments which should be tainted after the function returns. These are
  278. // stored in the state as TaintArgsOnPostVisit set.
  279. TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
  280. if (TaintArgs.isEmpty())
  281. return false;
  282. for (llvm::ImmutableSet<unsigned>::iterator
  283. I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
  284. unsigned ArgNum = *I;
  285. // Special handling for the tainted return value.
  286. if (ArgNum == ReturnValueIndex) {
  287. State = State->addTaint(CE, C.getLocationContext());
  288. continue;
  289. }
  290. // The arguments are pointer arguments. The data they are pointing at is
  291. // tainted after the call.
  292. if (CE->getNumArgs() < (ArgNum + 1))
  293. return false;
  294. const Expr* Arg = CE->getArg(ArgNum);
  295. SymbolRef Sym = getPointedToSymbol(C, Arg);
  296. if (Sym)
  297. State = State->addTaint(Sym);
  298. }
  299. // Clear up the taint info from the state.
  300. State = State->remove<TaintArgsOnPostVisit>();
  301. if (State != C.getState()) {
  302. C.addTransition(State);
  303. return true;
  304. }
  305. return false;
  306. }
  307. void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
  308. CheckerContext &C) const {
  309. // Define the attack surface.
  310. // Set the evaluation function by switching on the callee name.
  311. const FunctionDecl *FDecl = C.getCalleeDecl(CE);
  312. if (!FDecl || FDecl->getKind() != Decl::Function)
  313. return;
  314. StringRef Name = C.getCalleeName(FDecl);
  315. if (Name.empty())
  316. return;
  317. FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
  318. .Case("scanf", &GenericTaintChecker::postScanf)
  319. // TODO: Add support for vfscanf & family.
  320. .Case("getchar", &GenericTaintChecker::postRetTaint)
  321. .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
  322. .Case("getenv", &GenericTaintChecker::postRetTaint)
  323. .Case("fopen", &GenericTaintChecker::postRetTaint)
  324. .Case("fdopen", &GenericTaintChecker::postRetTaint)
  325. .Case("freopen", &GenericTaintChecker::postRetTaint)
  326. .Case("getch", &GenericTaintChecker::postRetTaint)
  327. .Case("wgetch", &GenericTaintChecker::postRetTaint)
  328. .Case("socket", &GenericTaintChecker::postSocket)
  329. .Default(nullptr);
  330. // If the callee isn't defined, it is not of security concern.
  331. // Check and evaluate the call.
  332. ProgramStateRef State = nullptr;
  333. if (evalFunction)
  334. State = (this->*evalFunction)(CE, C);
  335. if (!State)
  336. return;
  337. C.addTransition(State);
  338. }
  339. bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
  340. if (checkUncontrolledFormatString(CE, C))
  341. return true;
  342. const FunctionDecl *FDecl = C.getCalleeDecl(CE);
  343. if (!FDecl || FDecl->getKind() != Decl::Function)
  344. return false;
  345. StringRef Name = C.getCalleeName(FDecl);
  346. if (Name.empty())
  347. return false;
  348. if (checkSystemCall(CE, Name, C))
  349. return true;
  350. if (checkTaintedBufferSize(CE, FDecl, C))
  351. return true;
  352. return false;
  353. }
  354. SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
  355. const Expr* Arg) {
  356. ProgramStateRef State = C.getState();
  357. SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
  358. if (AddrVal.isUnknownOrUndef())
  359. return nullptr;
  360. Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
  361. if (!AddrLoc)
  362. return nullptr;
  363. const PointerType *ArgTy =
  364. dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
  365. SVal Val = State->getSVal(*AddrLoc,
  366. ArgTy ? ArgTy->getPointeeType(): QualType());
  367. return Val.getAsSymbol();
  368. }
  369. ProgramStateRef
  370. GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
  371. CheckerContext &C) const {
  372. ProgramStateRef State = C.getState();
  373. // Check for taint in arguments.
  374. bool IsTainted = false;
  375. for (ArgVector::const_iterator I = SrcArgs.begin(),
  376. E = SrcArgs.end(); I != E; ++I) {
  377. unsigned ArgNum = *I;
  378. if (ArgNum == InvalidArgIndex) {
  379. // Check if any of the arguments is tainted, but skip the
  380. // destination arguments.
  381. for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
  382. if (isDestinationArgument(i))
  383. continue;
  384. if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
  385. break;
  386. }
  387. break;
  388. }
  389. if (CE->getNumArgs() < (ArgNum + 1))
  390. return State;
  391. if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
  392. break;
  393. }
  394. if (!IsTainted)
  395. return State;
  396. // Mark the arguments which should be tainted after the function returns.
  397. for (ArgVector::const_iterator I = DstArgs.begin(),
  398. E = DstArgs.end(); I != E; ++I) {
  399. unsigned ArgNum = *I;
  400. // Should we mark all arguments as tainted?
  401. if (ArgNum == InvalidArgIndex) {
  402. // For all pointer and references that were passed in:
  403. // If they are not pointing to const data, mark data as tainted.
  404. // TODO: So far we are just going one level down; ideally we'd need to
  405. // recurse here.
  406. for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
  407. const Expr *Arg = CE->getArg(i);
  408. // Process pointer argument.
  409. const Type *ArgTy = Arg->getType().getTypePtr();
  410. QualType PType = ArgTy->getPointeeType();
  411. if ((!PType.isNull() && !PType.isConstQualified())
  412. || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
  413. State = State->add<TaintArgsOnPostVisit>(i);
  414. }
  415. continue;
  416. }
  417. // Should mark the return value?
  418. if (ArgNum == ReturnValueIndex) {
  419. State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
  420. continue;
  421. }
  422. // Mark the given argument.
  423. assert(ArgNum < CE->getNumArgs());
  424. State = State->add<TaintArgsOnPostVisit>(ArgNum);
  425. }
  426. return State;
  427. }
  428. // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
  429. // and arg 1 should get taint.
  430. ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
  431. CheckerContext &C) const {
  432. assert(CE->getNumArgs() >= 2);
  433. ProgramStateRef State = C.getState();
  434. // Check is the file descriptor is tainted.
  435. if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
  436. isStdin(CE->getArg(0), C)) {
  437. // All arguments except for the first two should get taint.
  438. for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
  439. State = State->add<TaintArgsOnPostVisit>(i);
  440. return State;
  441. }
  442. return nullptr;
  443. }
  444. // If argument 0(protocol domain) is network, the return value should get taint.
  445. ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
  446. CheckerContext &C) const {
  447. ProgramStateRef State = C.getState();
  448. if (CE->getNumArgs() < 3)
  449. return State;
  450. SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
  451. StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
  452. // White list the internal communication protocols.
  453. if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
  454. DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
  455. return State;
  456. State = State->addTaint(CE, C.getLocationContext());
  457. return State;
  458. }
  459. ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
  460. CheckerContext &C) const {
  461. ProgramStateRef State = C.getState();
  462. if (CE->getNumArgs() < 2)
  463. return State;
  464. // All arguments except for the very first one should get taint.
  465. for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
  466. // The arguments are pointer arguments. The data they are pointing at is
  467. // tainted after the call.
  468. const Expr* Arg = CE->getArg(i);
  469. SymbolRef Sym = getPointedToSymbol(C, Arg);
  470. if (Sym)
  471. State = State->addTaint(Sym);
  472. }
  473. return State;
  474. }
  475. ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
  476. CheckerContext &C) const {
  477. return C.getState()->addTaint(CE, C.getLocationContext());
  478. }
  479. bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
  480. ProgramStateRef State = C.getState();
  481. SVal Val = State->getSVal(E, C.getLocationContext());
  482. // stdin is a pointer, so it would be a region.
  483. const MemRegion *MemReg = Val.getAsRegion();
  484. // The region should be symbolic, we do not know it's value.
  485. const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
  486. if (!SymReg)
  487. return false;
  488. // Get it's symbol and find the declaration region it's pointing to.
  489. const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
  490. if (!Sm)
  491. return false;
  492. const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
  493. if (!DeclReg)
  494. return false;
  495. // This region corresponds to a declaration, find out if it's a global/extern
  496. // variable named stdin with the proper type.
  497. if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
  498. D = D->getCanonicalDecl();
  499. if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
  500. if (const PointerType * PtrTy =
  501. dyn_cast<PointerType>(D->getType().getTypePtr()))
  502. if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
  503. return true;
  504. }
  505. return false;
  506. }
  507. static bool getPrintfFormatArgumentNum(const CallExpr *CE,
  508. const CheckerContext &C,
  509. unsigned int &ArgNum) {
  510. // Find if the function contains a format string argument.
  511. // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
  512. // vsnprintf, syslog, custom annotated functions.
  513. const FunctionDecl *FDecl = C.getCalleeDecl(CE);
  514. if (!FDecl)
  515. return false;
  516. for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
  517. ArgNum = Format->getFormatIdx() - 1;
  518. if ((Format->getType()->getName() == "printf") &&
  519. CE->getNumArgs() > ArgNum)
  520. return true;
  521. }
  522. // Or if a function is named setproctitle (this is a heuristic).
  523. if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
  524. ArgNum = 0;
  525. return true;
  526. }
  527. return false;
  528. }
  529. bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
  530. const char Msg[],
  531. CheckerContext &C) const {
  532. assert(E);
  533. // Check for taint.
  534. ProgramStateRef State = C.getState();
  535. if (!State->isTainted(getPointedToSymbol(C, E)) &&
  536. !State->isTainted(E, C.getLocationContext()))
  537. return false;
  538. // Generate diagnostic.
  539. if (ExplodedNode *N = C.addTransition()) {
  540. initBugType();
  541. auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
  542. report->addRange(E->getSourceRange());
  543. C.emitReport(std::move(report));
  544. return true;
  545. }
  546. return false;
  547. }
  548. bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
  549. CheckerContext &C) const{
  550. // Check if the function contains a format string argument.
  551. unsigned int ArgNum = 0;
  552. if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
  553. return false;
  554. // If either the format string content or the pointer itself are tainted, warn.
  555. if (generateReportIfTainted(CE->getArg(ArgNum),
  556. MsgUncontrolledFormatString, C))
  557. return true;
  558. return false;
  559. }
  560. bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
  561. StringRef Name,
  562. CheckerContext &C) const {
  563. // TODO: It might make sense to run this check on demand. In some cases,
  564. // we should check if the environment has been cleansed here. We also might
  565. // need to know if the user was reset before these calls(seteuid).
  566. unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
  567. .Case("system", 0)
  568. .Case("popen", 0)
  569. .Case("execl", 0)
  570. .Case("execle", 0)
  571. .Case("execlp", 0)
  572. .Case("execv", 0)
  573. .Case("execvp", 0)
  574. .Case("execvP", 0)
  575. .Case("execve", 0)
  576. .Case("dlopen", 0)
  577. .Default(UINT_MAX);
  578. if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
  579. return false;
  580. if (generateReportIfTainted(CE->getArg(ArgNum),
  581. MsgSanitizeSystemArgs, C))
  582. return true;
  583. return false;
  584. }
  585. // TODO: Should this check be a part of the CString checker?
  586. // If yes, should taint be a global setting?
  587. bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
  588. const FunctionDecl *FDecl,
  589. CheckerContext &C) const {
  590. // If the function has a buffer size argument, set ArgNum.
  591. unsigned ArgNum = InvalidArgIndex;
  592. unsigned BId = 0;
  593. if ( (BId = FDecl->getMemoryFunctionKind()) )
  594. switch(BId) {
  595. case Builtin::BImemcpy:
  596. case Builtin::BImemmove:
  597. case Builtin::BIstrncpy:
  598. ArgNum = 2;
  599. break;
  600. case Builtin::BIstrndup:
  601. ArgNum = 1;
  602. break;
  603. default:
  604. break;
  605. };
  606. if (ArgNum == InvalidArgIndex) {
  607. if (C.isCLibraryFunction(FDecl, "malloc") ||
  608. C.isCLibraryFunction(FDecl, "calloc") ||
  609. C.isCLibraryFunction(FDecl, "alloca"))
  610. ArgNum = 0;
  611. else if (C.isCLibraryFunction(FDecl, "memccpy"))
  612. ArgNum = 3;
  613. else if (C.isCLibraryFunction(FDecl, "realloc"))
  614. ArgNum = 1;
  615. else if (C.isCLibraryFunction(FDecl, "bcopy"))
  616. ArgNum = 2;
  617. }
  618. if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
  619. generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
  620. return true;
  621. return false;
  622. }
  623. void ento::registerGenericTaintChecker(CheckerManager &mgr) {
  624. mgr.registerChecker<GenericTaintChecker>();
  625. }