FileAnalysis.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533
  1. //===- FileAnalysis.cpp -----------------------------------------*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. #include "FileAnalysis.h"
  10. #include "GraphBuilder.h"
  11. #include "llvm/BinaryFormat/ELF.h"
  12. #include "llvm/DebugInfo/DWARF/DWARFContext.h"
  13. #include "llvm/MC/MCAsmInfo.h"
  14. #include "llvm/MC/MCContext.h"
  15. #include "llvm/MC/MCDisassembler/MCDisassembler.h"
  16. #include "llvm/MC/MCInst.h"
  17. #include "llvm/MC/MCInstPrinter.h"
  18. #include "llvm/MC/MCInstrAnalysis.h"
  19. #include "llvm/MC/MCInstrDesc.h"
  20. #include "llvm/MC/MCInstrInfo.h"
  21. #include "llvm/MC/MCObjectFileInfo.h"
  22. #include "llvm/MC/MCRegisterInfo.h"
  23. #include "llvm/MC/MCSubtargetInfo.h"
  24. #include "llvm/Object/Binary.h"
  25. #include "llvm/Object/COFF.h"
  26. #include "llvm/Object/ELFObjectFile.h"
  27. #include "llvm/Object/ObjectFile.h"
  28. #include "llvm/Support/Casting.h"
  29. #include "llvm/Support/CommandLine.h"
  30. #include "llvm/Support/Error.h"
  31. #include "llvm/Support/MemoryBuffer.h"
  32. #include "llvm/Support/TargetRegistry.h"
  33. #include "llvm/Support/TargetSelect.h"
  34. #include "llvm/Support/raw_ostream.h"
  35. using Instr = llvm::cfi_verify::FileAnalysis::Instr;
  36. using LLVMSymbolizer = llvm::symbolize::LLVMSymbolizer;
  37. namespace llvm {
  38. namespace cfi_verify {
  39. bool IgnoreDWARFFlag;
  40. static cl::opt<bool, true> IgnoreDWARFArg(
  41. "ignore-dwarf",
  42. cl::desc(
  43. "Ignore all DWARF data. This relaxes the requirements for all "
  44. "statically linked libraries to have been compiled with '-g', but "
  45. "will result in false positives for 'CFI unprotected' instructions."),
  46. cl::location(IgnoreDWARFFlag), cl::init(false));
  47. StringRef stringCFIProtectionStatus(CFIProtectionStatus Status) {
  48. switch (Status) {
  49. case CFIProtectionStatus::PROTECTED:
  50. return "PROTECTED";
  51. case CFIProtectionStatus::FAIL_NOT_INDIRECT_CF:
  52. return "FAIL_NOT_INDIRECT_CF";
  53. case CFIProtectionStatus::FAIL_ORPHANS:
  54. return "FAIL_ORPHANS";
  55. case CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH:
  56. return "FAIL_BAD_CONDITIONAL_BRANCH";
  57. case CFIProtectionStatus::FAIL_REGISTER_CLOBBERED:
  58. return "FAIL_REGISTER_CLOBBERED";
  59. case CFIProtectionStatus::FAIL_INVALID_INSTRUCTION:
  60. return "FAIL_INVALID_INSTRUCTION";
  61. }
  62. llvm_unreachable("Attempted to stringify an unknown enum value.");
  63. }
  64. Expected<FileAnalysis> FileAnalysis::Create(StringRef Filename) {
  65. // Open the filename provided.
  66. Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
  67. object::createBinary(Filename);
  68. if (!BinaryOrErr)
  69. return BinaryOrErr.takeError();
  70. // Construct the object and allow it to take ownership of the binary.
  71. object::OwningBinary<object::Binary> Binary = std::move(BinaryOrErr.get());
  72. FileAnalysis Analysis(std::move(Binary));
  73. Analysis.Object = dyn_cast<object::ObjectFile>(Analysis.Binary.getBinary());
  74. if (!Analysis.Object)
  75. return make_error<UnsupportedDisassembly>("Failed to cast object");
  76. switch (Analysis.Object->getArch()) {
  77. case Triple::x86:
  78. case Triple::x86_64:
  79. case Triple::aarch64:
  80. case Triple::aarch64_be:
  81. break;
  82. default:
  83. return make_error<UnsupportedDisassembly>("Unsupported architecture.");
  84. }
  85. Analysis.ObjectTriple = Analysis.Object->makeTriple();
  86. Analysis.Features = Analysis.Object->getFeatures();
  87. // Init the rest of the object.
  88. if (auto InitResponse = Analysis.initialiseDisassemblyMembers())
  89. return std::move(InitResponse);
  90. if (auto SectionParseResponse = Analysis.parseCodeSections())
  91. return std::move(SectionParseResponse);
  92. return std::move(Analysis);
  93. }
  94. FileAnalysis::FileAnalysis(object::OwningBinary<object::Binary> Binary)
  95. : Binary(std::move(Binary)) {}
  96. FileAnalysis::FileAnalysis(const Triple &ObjectTriple,
  97. const SubtargetFeatures &Features)
  98. : ObjectTriple(ObjectTriple), Features(Features) {}
  99. const Instr *
  100. FileAnalysis::getPrevInstructionSequential(const Instr &InstrMeta) const {
  101. std::map<uint64_t, Instr>::const_iterator KV =
  102. Instructions.find(InstrMeta.VMAddress);
  103. if (KV == Instructions.end() || KV == Instructions.begin())
  104. return nullptr;
  105. if (!(--KV)->second.Valid)
  106. return nullptr;
  107. return &KV->second;
  108. }
  109. const Instr *
  110. FileAnalysis::getNextInstructionSequential(const Instr &InstrMeta) const {
  111. std::map<uint64_t, Instr>::const_iterator KV =
  112. Instructions.find(InstrMeta.VMAddress);
  113. if (KV == Instructions.end() || ++KV == Instructions.end())
  114. return nullptr;
  115. if (!KV->second.Valid)
  116. return nullptr;
  117. return &KV->second;
  118. }
  119. bool FileAnalysis::usesRegisterOperand(const Instr &InstrMeta) const {
  120. for (const auto &Operand : InstrMeta.Instruction) {
  121. if (Operand.isReg())
  122. return true;
  123. }
  124. return false;
  125. }
  126. const Instr *FileAnalysis::getInstruction(uint64_t Address) const {
  127. const auto &InstrKV = Instructions.find(Address);
  128. if (InstrKV == Instructions.end())
  129. return nullptr;
  130. return &InstrKV->second;
  131. }
  132. const Instr &FileAnalysis::getInstructionOrDie(uint64_t Address) const {
  133. const auto &InstrKV = Instructions.find(Address);
  134. assert(InstrKV != Instructions.end() && "Address doesn't exist.");
  135. return InstrKV->second;
  136. }
  137. bool FileAnalysis::isCFITrap(const Instr &InstrMeta) const {
  138. const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
  139. return InstrDesc.isTrap();
  140. }
  141. bool FileAnalysis::canFallThrough(const Instr &InstrMeta) const {
  142. if (!InstrMeta.Valid)
  143. return false;
  144. if (isCFITrap(InstrMeta))
  145. return false;
  146. const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
  147. if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo))
  148. return InstrDesc.isConditionalBranch();
  149. return true;
  150. }
  151. const Instr *
  152. FileAnalysis::getDefiniteNextInstruction(const Instr &InstrMeta) const {
  153. if (!InstrMeta.Valid)
  154. return nullptr;
  155. if (isCFITrap(InstrMeta))
  156. return nullptr;
  157. const auto &InstrDesc = MII->get(InstrMeta.Instruction.getOpcode());
  158. const Instr *NextMetaPtr;
  159. if (InstrDesc.mayAffectControlFlow(InstrMeta.Instruction, *RegisterInfo)) {
  160. if (InstrDesc.isConditionalBranch())
  161. return nullptr;
  162. uint64_t Target;
  163. if (!MIA->evaluateBranch(InstrMeta.Instruction, InstrMeta.VMAddress,
  164. InstrMeta.InstructionSize, Target))
  165. return nullptr;
  166. NextMetaPtr = getInstruction(Target);
  167. } else {
  168. NextMetaPtr =
  169. getInstruction(InstrMeta.VMAddress + InstrMeta.InstructionSize);
  170. }
  171. if (!NextMetaPtr || !NextMetaPtr->Valid)
  172. return nullptr;
  173. return NextMetaPtr;
  174. }
  175. std::set<const Instr *>
  176. FileAnalysis::getDirectControlFlowXRefs(const Instr &InstrMeta) const {
  177. std::set<const Instr *> CFCrossReferences;
  178. const Instr *PrevInstruction = getPrevInstructionSequential(InstrMeta);
  179. if (PrevInstruction && canFallThrough(*PrevInstruction))
  180. CFCrossReferences.insert(PrevInstruction);
  181. const auto &TargetRefsKV = StaticBranchTargetings.find(InstrMeta.VMAddress);
  182. if (TargetRefsKV == StaticBranchTargetings.end())
  183. return CFCrossReferences;
  184. for (uint64_t SourceInstrAddress : TargetRefsKV->second) {
  185. const auto &SourceInstrKV = Instructions.find(SourceInstrAddress);
  186. if (SourceInstrKV == Instructions.end()) {
  187. errs() << "Failed to find source instruction at address "
  188. << format_hex(SourceInstrAddress, 2)
  189. << " for the cross-reference to instruction at address "
  190. << format_hex(InstrMeta.VMAddress, 2) << ".\n";
  191. continue;
  192. }
  193. CFCrossReferences.insert(&SourceInstrKV->second);
  194. }
  195. return CFCrossReferences;
  196. }
  197. const std::set<uint64_t> &FileAnalysis::getIndirectInstructions() const {
  198. return IndirectInstructions;
  199. }
  200. const MCRegisterInfo *FileAnalysis::getRegisterInfo() const {
  201. return RegisterInfo.get();
  202. }
  203. const MCInstrInfo *FileAnalysis::getMCInstrInfo() const { return MII.get(); }
  204. const MCInstrAnalysis *FileAnalysis::getMCInstrAnalysis() const {
  205. return MIA.get();
  206. }
  207. Expected<DIInliningInfo> FileAnalysis::symbolizeInlinedCode(uint64_t Address) {
  208. assert(Symbolizer != nullptr && "Symbolizer is invalid.");
  209. return Symbolizer->symbolizeInlinedCode(Object->getFileName(), Address);
  210. }
  211. CFIProtectionStatus
  212. FileAnalysis::validateCFIProtection(const GraphResult &Graph) const {
  213. const Instr *InstrMetaPtr = getInstruction(Graph.BaseAddress);
  214. if (!InstrMetaPtr)
  215. return CFIProtectionStatus::FAIL_INVALID_INSTRUCTION;
  216. const auto &InstrDesc = MII->get(InstrMetaPtr->Instruction.getOpcode());
  217. if (!InstrDesc.mayAffectControlFlow(InstrMetaPtr->Instruction, *RegisterInfo))
  218. return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
  219. if (!usesRegisterOperand(*InstrMetaPtr))
  220. return CFIProtectionStatus::FAIL_NOT_INDIRECT_CF;
  221. if (!Graph.OrphanedNodes.empty())
  222. return CFIProtectionStatus::FAIL_ORPHANS;
  223. for (const auto &BranchNode : Graph.ConditionalBranchNodes) {
  224. if (!BranchNode.CFIProtection)
  225. return CFIProtectionStatus::FAIL_BAD_CONDITIONAL_BRANCH;
  226. }
  227. if (indirectCFOperandClobber(Graph) != Graph.BaseAddress)
  228. return CFIProtectionStatus::FAIL_REGISTER_CLOBBERED;
  229. return CFIProtectionStatus::PROTECTED;
  230. }
  231. uint64_t FileAnalysis::indirectCFOperandClobber(const GraphResult &Graph) const {
  232. assert(Graph.OrphanedNodes.empty() && "Orphaned nodes should be empty.");
  233. // Get the set of registers we must check to ensure they're not clobbered.
  234. const Instr &IndirectCF = getInstructionOrDie(Graph.BaseAddress);
  235. DenseSet<unsigned> RegisterNumbers;
  236. for (const auto &Operand : IndirectCF.Instruction) {
  237. if (Operand.isReg())
  238. RegisterNumbers.insert(Operand.getReg());
  239. }
  240. assert(RegisterNumbers.size() && "Zero register operands on indirect CF.");
  241. // Now check all branches to indirect CFs and ensure no clobbering happens.
  242. for (const auto &Branch : Graph.ConditionalBranchNodes) {
  243. uint64_t Node;
  244. if (Branch.IndirectCFIsOnTargetPath)
  245. Node = Branch.Target;
  246. else
  247. Node = Branch.Fallthrough;
  248. // Some architectures (e.g., AArch64) cannot load in an indirect branch, so
  249. // we allow them one load.
  250. bool canLoad = !MII->get(IndirectCF.Instruction.getOpcode()).mayLoad();
  251. // We walk backwards from the indirect CF. It is the last node returned by
  252. // Graph.flattenAddress, so we skip it since we already handled it.
  253. DenseSet<unsigned> CurRegisterNumbers = RegisterNumbers;
  254. std::vector<uint64_t> Nodes = Graph.flattenAddress(Node);
  255. for (auto I = Nodes.rbegin() + 1, E = Nodes.rend(); I != E; ++I) {
  256. Node = *I;
  257. const Instr &NodeInstr = getInstructionOrDie(Node);
  258. const auto &InstrDesc = MII->get(NodeInstr.Instruction.getOpcode());
  259. for (auto RI = CurRegisterNumbers.begin(), RE = CurRegisterNumbers.end();
  260. RI != RE; ++RI) {
  261. unsigned RegNum = *RI;
  262. if (InstrDesc.hasDefOfPhysReg(NodeInstr.Instruction, RegNum,
  263. *RegisterInfo)) {
  264. if (!canLoad || !InstrDesc.mayLoad())
  265. return Node;
  266. canLoad = false;
  267. CurRegisterNumbers.erase(RI);
  268. // Add the registers this load reads to those we check for clobbers.
  269. for (unsigned i = InstrDesc.getNumDefs(),
  270. e = InstrDesc.getNumOperands(); i != e; i++) {
  271. const auto Operand = NodeInstr.Instruction.getOperand(i);
  272. if (Operand.isReg())
  273. CurRegisterNumbers.insert(Operand.getReg());
  274. }
  275. break;
  276. }
  277. }
  278. }
  279. }
  280. return Graph.BaseAddress;
  281. }
  282. void FileAnalysis::printInstruction(const Instr &InstrMeta,
  283. raw_ostream &OS) const {
  284. Printer->printInst(&InstrMeta.Instruction, OS, "", *SubtargetInfo.get());
  285. }
  286. Error FileAnalysis::initialiseDisassemblyMembers() {
  287. std::string TripleName = ObjectTriple.getTriple();
  288. ArchName = "";
  289. MCPU = "";
  290. std::string ErrorString;
  291. Symbolizer.reset(new LLVMSymbolizer());
  292. ObjectTarget =
  293. TargetRegistry::lookupTarget(ArchName, ObjectTriple, ErrorString);
  294. if (!ObjectTarget)
  295. return make_error<UnsupportedDisassembly>(
  296. (Twine("Couldn't find target \"") + ObjectTriple.getTriple() +
  297. "\", failed with error: " + ErrorString)
  298. .str());
  299. RegisterInfo.reset(ObjectTarget->createMCRegInfo(TripleName));
  300. if (!RegisterInfo)
  301. return make_error<UnsupportedDisassembly>(
  302. "Failed to initialise RegisterInfo.");
  303. AsmInfo.reset(ObjectTarget->createMCAsmInfo(*RegisterInfo, TripleName));
  304. if (!AsmInfo)
  305. return make_error<UnsupportedDisassembly>("Failed to initialise AsmInfo.");
  306. SubtargetInfo.reset(ObjectTarget->createMCSubtargetInfo(
  307. TripleName, MCPU, Features.getString()));
  308. if (!SubtargetInfo)
  309. return make_error<UnsupportedDisassembly>(
  310. "Failed to initialise SubtargetInfo.");
  311. MII.reset(ObjectTarget->createMCInstrInfo());
  312. if (!MII)
  313. return make_error<UnsupportedDisassembly>("Failed to initialise MII.");
  314. Context.reset(new MCContext(AsmInfo.get(), RegisterInfo.get(), &MOFI));
  315. Disassembler.reset(
  316. ObjectTarget->createMCDisassembler(*SubtargetInfo, *Context));
  317. if (!Disassembler)
  318. return make_error<UnsupportedDisassembly>(
  319. "No disassembler available for target");
  320. MIA.reset(ObjectTarget->createMCInstrAnalysis(MII.get()));
  321. Printer.reset(ObjectTarget->createMCInstPrinter(
  322. ObjectTriple, AsmInfo->getAssemblerDialect(), *AsmInfo, *MII,
  323. *RegisterInfo));
  324. return Error::success();
  325. }
  326. Error FileAnalysis::parseCodeSections() {
  327. if (!IgnoreDWARFFlag) {
  328. std::unique_ptr<DWARFContext> DWARF = DWARFContext::create(*Object);
  329. if (!DWARF)
  330. return make_error<StringError>("Could not create DWARF information.",
  331. inconvertibleErrorCode());
  332. bool LineInfoValid = false;
  333. for (auto &Unit : DWARF->compile_units()) {
  334. const auto &LineTable = DWARF->getLineTableForUnit(Unit.get());
  335. if (LineTable && !LineTable->Rows.empty()) {
  336. LineInfoValid = true;
  337. break;
  338. }
  339. }
  340. if (!LineInfoValid)
  341. return make_error<StringError>(
  342. "DWARF line information missing. Did you compile with '-g'?",
  343. inconvertibleErrorCode());
  344. }
  345. for (const object::SectionRef &Section : Object->sections()) {
  346. // Ensure only executable sections get analysed.
  347. if (!(object::ELFSectionRef(Section).getFlags() & ELF::SHF_EXECINSTR))
  348. continue;
  349. StringRef SectionContents;
  350. if (Section.getContents(SectionContents))
  351. return make_error<StringError>("Failed to retrieve section contents",
  352. inconvertibleErrorCode());
  353. ArrayRef<uint8_t> SectionBytes((const uint8_t *)SectionContents.data(),
  354. Section.getSize());
  355. parseSectionContents(SectionBytes, Section.getAddress());
  356. }
  357. return Error::success();
  358. }
  359. void FileAnalysis::parseSectionContents(ArrayRef<uint8_t> SectionBytes,
  360. uint64_t SectionAddress) {
  361. assert(Symbolizer && "Symbolizer is uninitialised.");
  362. MCInst Instruction;
  363. Instr InstrMeta;
  364. uint64_t InstructionSize;
  365. for (uint64_t Byte = 0; Byte < SectionBytes.size();) {
  366. bool ValidInstruction =
  367. Disassembler->getInstruction(Instruction, InstructionSize,
  368. SectionBytes.drop_front(Byte), 0, nulls(),
  369. outs()) == MCDisassembler::Success;
  370. Byte += InstructionSize;
  371. uint64_t VMAddress = SectionAddress + Byte - InstructionSize;
  372. InstrMeta.Instruction = Instruction;
  373. InstrMeta.VMAddress = VMAddress;
  374. InstrMeta.InstructionSize = InstructionSize;
  375. InstrMeta.Valid = ValidInstruction;
  376. addInstruction(InstrMeta);
  377. if (!ValidInstruction)
  378. continue;
  379. // Skip additional parsing for instructions that do not affect the control
  380. // flow.
  381. const auto &InstrDesc = MII->get(Instruction.getOpcode());
  382. if (!InstrDesc.mayAffectControlFlow(Instruction, *RegisterInfo))
  383. continue;
  384. uint64_t Target;
  385. if (MIA->evaluateBranch(Instruction, VMAddress, InstructionSize, Target)) {
  386. // If the target can be evaluated, it's not indirect.
  387. StaticBranchTargetings[Target].push_back(VMAddress);
  388. continue;
  389. }
  390. if (!usesRegisterOperand(InstrMeta))
  391. continue;
  392. if (InstrDesc.isReturn())
  393. continue;
  394. // Check if this instruction exists in the range of the DWARF metadata.
  395. if (!IgnoreDWARFFlag) {
  396. auto LineInfo =
  397. Symbolizer->symbolizeCode(Object->getFileName(), VMAddress);
  398. if (!LineInfo) {
  399. handleAllErrors(LineInfo.takeError(), [](const ErrorInfoBase &E) {
  400. errs() << "Symbolizer failed to get line: " << E.message() << "\n";
  401. });
  402. continue;
  403. }
  404. if (LineInfo->FileName == "<invalid>")
  405. continue;
  406. }
  407. IndirectInstructions.insert(VMAddress);
  408. }
  409. }
  410. void FileAnalysis::addInstruction(const Instr &Instruction) {
  411. const auto &KV =
  412. Instructions.insert(std::make_pair(Instruction.VMAddress, Instruction));
  413. if (!KV.second) {
  414. errs() << "Failed to add instruction at address "
  415. << format_hex(Instruction.VMAddress, 2)
  416. << ": Instruction at this address already exists.\n";
  417. exit(EXIT_FAILURE);
  418. }
  419. }
  420. UnsupportedDisassembly::UnsupportedDisassembly(StringRef Text) : Text(Text) {}
  421. char UnsupportedDisassembly::ID;
  422. void UnsupportedDisassembly::log(raw_ostream &OS) const {
  423. OS << "Could not initialise disassembler: " << Text;
  424. }
  425. std::error_code UnsupportedDisassembly::convertToErrorCode() const {
  426. return std::error_code();
  427. }
  428. } // namespace cfi_verify
  429. } // namespace llvm