SarifDiagnostics.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356
  1. //===--- SarifDiagnostics.cpp - Sarif Diagnostics for Paths -----*- C++ -*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file defines the SarifDiagnostics object.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "clang/Analysis/PathDiagnostic.h"
  13. #include "clang/Basic/Version.h"
  14. #include "clang/Lex/Preprocessor.h"
  15. #include "clang/StaticAnalyzer/Core/AnalyzerOptions.h"
  16. #include "clang/StaticAnalyzer/Core/PathDiagnosticConsumers.h"
  17. #include "llvm/ADT/STLExtras.h"
  18. #include "llvm/ADT/StringMap.h"
  19. #include "llvm/Support/JSON.h"
  20. #include "llvm/Support/Path.h"
  21. using namespace llvm;
  22. using namespace clang;
  23. using namespace ento;
  24. namespace {
  25. class SarifDiagnostics : public PathDiagnosticConsumer {
  26. std::string OutputFile;
  27. public:
  28. SarifDiagnostics(AnalyzerOptions &, const std::string &Output)
  29. : OutputFile(Output) {}
  30. ~SarifDiagnostics() override = default;
  31. void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags,
  32. FilesMade *FM) override;
  33. StringRef getName() const override { return "SarifDiagnostics"; }
  34. PathGenerationScheme getGenerationScheme() const override { return Minimal; }
  35. bool supportsLogicalOpControlFlow() const override { return true; }
  36. bool supportsCrossFileDiagnostics() const override { return true; }
  37. };
  38. } // end anonymous namespace
  39. void ento::createSarifDiagnosticConsumer(
  40. AnalyzerOptions &AnalyzerOpts, PathDiagnosticConsumers &C,
  41. const std::string &Output, const Preprocessor &,
  42. const cross_tu::CrossTranslationUnitContext &) {
  43. C.push_back(new SarifDiagnostics(AnalyzerOpts, Output));
  44. }
  45. static StringRef getFileName(const FileEntry &FE) {
  46. StringRef Filename = FE.tryGetRealPathName();
  47. if (Filename.empty())
  48. Filename = FE.getName();
  49. return Filename;
  50. }
  51. static std::string percentEncodeURICharacter(char C) {
  52. // RFC 3986 claims alpha, numeric, and this handful of
  53. // characters are not reserved for the path component and
  54. // should be written out directly. Otherwise, percent
  55. // encode the character and write that out instead of the
  56. // reserved character.
  57. if (llvm::isAlnum(C) ||
  58. StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
  59. return std::string(&C, 1);
  60. return "%" + llvm::toHex(StringRef(&C, 1));
  61. }
  62. static std::string fileNameToURI(StringRef Filename) {
  63. llvm::SmallString<32> Ret = StringRef("file://");
  64. // Get the root name to see if it has a URI authority.
  65. StringRef Root = sys::path::root_name(Filename);
  66. if (Root.startswith("//")) {
  67. // There is an authority, so add it to the URI.
  68. Ret += Root.drop_front(2).str();
  69. } else if (!Root.empty()) {
  70. // There is no authority, so end the component and add the root to the URI.
  71. Ret += Twine("/" + Root).str();
  72. }
  73. auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
  74. assert(Iter != End && "Expected there to be a non-root path component.");
  75. // Add the rest of the path components, encoding any reserved characters;
  76. // we skip past the first path component, as it was handled it above.
  77. std::for_each(++Iter, End, [&Ret](StringRef Component) {
  78. // For reasons unknown to me, we may get a backslash with Windows native
  79. // paths for the initial backslash following the drive component, which
  80. // we need to ignore as a URI path part.
  81. if (Component == "\\")
  82. return;
  83. // Add the separator between the previous path part and the one being
  84. // currently processed.
  85. Ret += "/";
  86. // URI encode the part.
  87. for (char C : Component) {
  88. Ret += percentEncodeURICharacter(C);
  89. }
  90. });
  91. return Ret.str().str();
  92. }
  93. static json::Object createArtifactLocation(const FileEntry &FE) {
  94. return json::Object{{"uri", fileNameToURI(getFileName(FE))}};
  95. }
  96. static json::Object createArtifact(const FileEntry &FE) {
  97. return json::Object{{"location", createArtifactLocation(FE)},
  98. {"roles", json::Array{"resultFile"}},
  99. {"length", FE.getSize()},
  100. {"mimeType", "text/plain"}};
  101. }
  102. static json::Object createArtifactLocation(const FileEntry &FE,
  103. json::Array &Artifacts) {
  104. std::string FileURI = fileNameToURI(getFileName(FE));
  105. // See if the Artifacts array contains this URI already. If it does not,
  106. // create a new artifact object to add to the array.
  107. auto I = llvm::find_if(Artifacts, [&](const json::Value &File) {
  108. if (const json::Object *Obj = File.getAsObject()) {
  109. if (const json::Object *FileLoc = Obj->getObject("location")) {
  110. Optional<StringRef> URI = FileLoc->getString("uri");
  111. return URI && URI->equals(FileURI);
  112. }
  113. }
  114. return false;
  115. });
  116. // Calculate the index within the artifact array so it can be stored in
  117. // the JSON object.
  118. auto Index = static_cast<unsigned>(std::distance(Artifacts.begin(), I));
  119. if (I == Artifacts.end())
  120. Artifacts.push_back(createArtifact(FE));
  121. return json::Object{{"uri", FileURI}, {"index", Index}};
  122. }
  123. static json::Object createTextRegion(SourceRange R, const SourceManager &SM) {
  124. json::Object Region{
  125. {"startLine", SM.getExpansionLineNumber(R.getBegin())},
  126. {"startColumn", SM.getExpansionColumnNumber(R.getBegin())},
  127. };
  128. if (R.getBegin() == R.getEnd()) {
  129. Region["endColumn"] = SM.getExpansionColumnNumber(R.getBegin());
  130. } else {
  131. Region["endLine"] = SM.getExpansionLineNumber(R.getEnd());
  132. Region["endColumn"] = SM.getExpansionColumnNumber(R.getEnd()) + 1;
  133. }
  134. return Region;
  135. }
  136. static json::Object createPhysicalLocation(SourceRange R, const FileEntry &FE,
  137. const SourceManager &SMgr,
  138. json::Array &Artifacts) {
  139. return json::Object{
  140. {{"artifactLocation", createArtifactLocation(FE, Artifacts)},
  141. {"region", createTextRegion(R, SMgr)}}};
  142. }
  143. enum class Importance { Important, Essential, Unimportant };
  144. static StringRef importanceToStr(Importance I) {
  145. switch (I) {
  146. case Importance::Important:
  147. return "important";
  148. case Importance::Essential:
  149. return "essential";
  150. case Importance::Unimportant:
  151. return "unimportant";
  152. }
  153. llvm_unreachable("Fully covered switch is not so fully covered");
  154. }
  155. static json::Object createThreadFlowLocation(json::Object &&Location,
  156. Importance I) {
  157. return json::Object{{"location", std::move(Location)},
  158. {"importance", importanceToStr(I)}};
  159. }
  160. static json::Object createMessage(StringRef Text) {
  161. return json::Object{{"text", Text.str()}};
  162. }
  163. static json::Object createLocation(json::Object &&PhysicalLocation,
  164. StringRef Message = "") {
  165. json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
  166. if (!Message.empty())
  167. Ret.insert({"message", createMessage(Message)});
  168. return Ret;
  169. }
  170. static Importance calculateImportance(const PathDiagnosticPiece &Piece) {
  171. switch (Piece.getKind()) {
  172. case PathDiagnosticPiece::Call:
  173. case PathDiagnosticPiece::Macro:
  174. case PathDiagnosticPiece::Note:
  175. case PathDiagnosticPiece::PopUp:
  176. // FIXME: What should be reported here?
  177. break;
  178. case PathDiagnosticPiece::Event:
  179. return Piece.getTagStr() == "ConditionBRVisitor" ? Importance::Important
  180. : Importance::Essential;
  181. case PathDiagnosticPiece::ControlFlow:
  182. return Importance::Unimportant;
  183. }
  184. return Importance::Unimportant;
  185. }
  186. static json::Object createThreadFlow(const PathPieces &Pieces,
  187. json::Array &Artifacts) {
  188. const SourceManager &SMgr = Pieces.front()->getLocation().getManager();
  189. json::Array Locations;
  190. for (const auto &Piece : Pieces) {
  191. const PathDiagnosticLocation &P = Piece->getLocation();
  192. Locations.push_back(createThreadFlowLocation(
  193. createLocation(createPhysicalLocation(
  194. P.asRange(),
  195. *P.asLocation().getExpansionLoc().getFileEntry(),
  196. SMgr, Artifacts),
  197. Piece->getString()),
  198. calculateImportance(*Piece)));
  199. }
  200. return json::Object{{"locations", std::move(Locations)}};
  201. }
  202. static json::Object createCodeFlow(const PathPieces &Pieces,
  203. json::Array &Artifacts) {
  204. return json::Object{
  205. {"threadFlows", json::Array{createThreadFlow(Pieces, Artifacts)}}};
  206. }
  207. static json::Object createResult(const PathDiagnostic &Diag,
  208. json::Array &Artifacts,
  209. const StringMap<unsigned> &RuleMapping) {
  210. const PathPieces &Path = Diag.path.flatten(false);
  211. const SourceManager &SMgr = Path.front()->getLocation().getManager();
  212. auto Iter = RuleMapping.find(Diag.getCheckerName());
  213. assert(Iter != RuleMapping.end() && "Rule ID is not in the array index map?");
  214. return json::Object{
  215. {"message", createMessage(Diag.getVerboseDescription())},
  216. {"codeFlows", json::Array{createCodeFlow(Path, Artifacts)}},
  217. {"locations",
  218. json::Array{createLocation(createPhysicalLocation(
  219. Diag.getLocation().asRange(),
  220. *Diag.getLocation().asLocation().getExpansionLoc().getFileEntry(),
  221. SMgr, Artifacts))}},
  222. {"ruleIndex", Iter->getValue()},
  223. {"ruleId", Diag.getCheckerName()}};
  224. }
  225. static StringRef getRuleDescription(StringRef CheckName) {
  226. return llvm::StringSwitch<StringRef>(CheckName)
  227. #define GET_CHECKERS
  228. #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN) \
  229. .Case(FULLNAME, HELPTEXT)
  230. #include "clang/StaticAnalyzer/Checkers/Checkers.inc"
  231. #undef CHECKER
  232. #undef GET_CHECKERS
  233. ;
  234. }
  235. static StringRef getRuleHelpURIStr(StringRef CheckName) {
  236. return llvm::StringSwitch<StringRef>(CheckName)
  237. #define GET_CHECKERS
  238. #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN) \
  239. .Case(FULLNAME, DOC_URI)
  240. #include "clang/StaticAnalyzer/Checkers/Checkers.inc"
  241. #undef CHECKER
  242. #undef GET_CHECKERS
  243. ;
  244. }
  245. static json::Object createRule(const PathDiagnostic &Diag) {
  246. StringRef CheckName = Diag.getCheckerName();
  247. json::Object Ret{
  248. {"fullDescription", createMessage(getRuleDescription(CheckName))},
  249. {"name", CheckName},
  250. {"id", CheckName}};
  251. std::string RuleURI = getRuleHelpURIStr(CheckName);
  252. if (!RuleURI.empty())
  253. Ret["helpUri"] = RuleURI;
  254. return Ret;
  255. }
  256. static json::Array createRules(std::vector<const PathDiagnostic *> &Diags,
  257. StringMap<unsigned> &RuleMapping) {
  258. json::Array Rules;
  259. llvm::StringSet<> Seen;
  260. llvm::for_each(Diags, [&](const PathDiagnostic *D) {
  261. StringRef RuleID = D->getCheckerName();
  262. std::pair<llvm::StringSet<>::iterator, bool> P = Seen.insert(RuleID);
  263. if (P.second) {
  264. RuleMapping[RuleID] = Rules.size(); // Maps RuleID to an Array Index.
  265. Rules.push_back(createRule(*D));
  266. }
  267. });
  268. return Rules;
  269. }
  270. static json::Object createTool(std::vector<const PathDiagnostic *> &Diags,
  271. StringMap<unsigned> &RuleMapping) {
  272. return json::Object{
  273. {"driver", json::Object{{"name", "clang"},
  274. {"fullName", "clang static analyzer"},
  275. {"language", "en-US"},
  276. {"version", getClangFullVersion()},
  277. {"rules", createRules(Diags, RuleMapping)}}}};
  278. }
  279. static json::Object createRun(std::vector<const PathDiagnostic *> &Diags) {
  280. json::Array Results, Artifacts;
  281. StringMap<unsigned> RuleMapping;
  282. json::Object Tool = createTool(Diags, RuleMapping);
  283. llvm::for_each(Diags, [&](const PathDiagnostic *D) {
  284. Results.push_back(createResult(*D, Artifacts, RuleMapping));
  285. });
  286. return json::Object{{"tool", std::move(Tool)},
  287. {"results", std::move(Results)},
  288. {"artifacts", std::move(Artifacts)}};
  289. }
  290. void SarifDiagnostics::FlushDiagnosticsImpl(
  291. std::vector<const PathDiagnostic *> &Diags, FilesMade *) {
  292. // We currently overwrite the file if it already exists. However, it may be
  293. // useful to add a feature someday that allows the user to append a run to an
  294. // existing SARIF file. One danger from that approach is that the size of the
  295. // file can become large very quickly, so decoding into JSON to append a run
  296. // may be an expensive operation.
  297. std::error_code EC;
  298. llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::OF_Text);
  299. if (EC) {
  300. llvm::errs() << "warning: could not create file: " << EC.message() << '\n';
  301. return;
  302. }
  303. json::Object Sarif{
  304. {"$schema",
  305. "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"},
  306. {"version", "2.1.0"},
  307. {"runs", json::Array{createRun(Diags)}}};
  308. OS << llvm::formatv("{0:2}\n", json::Value(std::move(Sarif)));
  309. }