InputFile.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. //===- InputFile.cpp ------------------------------------------ *- C++ --*-===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "InputFile.h"
  9. #include "FormatUtil.h"
  10. #include "LinePrinter.h"
  11. #include "llvm/BinaryFormat/Magic.h"
  12. #include "llvm/DebugInfo/CodeView/CodeView.h"
  13. #include "llvm/DebugInfo/CodeView/LazyRandomTypeCollection.h"
  14. #include "llvm/DebugInfo/CodeView/StringsAndChecksums.h"
  15. #include "llvm/DebugInfo/PDB/Native/DbiStream.h"
  16. #include "llvm/DebugInfo/PDB/Native/NativeSession.h"
  17. #include "llvm/DebugInfo/PDB/Native/PDBFile.h"
  18. #include "llvm/DebugInfo/PDB/Native/PDBStringTable.h"
  19. #include "llvm/DebugInfo/PDB/Native/RawError.h"
  20. #include "llvm/DebugInfo/PDB/Native/TpiStream.h"
  21. #include "llvm/DebugInfo/PDB/PDB.h"
  22. #include "llvm/Object/COFF.h"
  23. #include "llvm/Support/FileSystem.h"
  24. #include "llvm/Support/FormatVariadic.h"
  25. using namespace llvm;
  26. using namespace llvm::codeview;
  27. using namespace llvm::object;
  28. using namespace llvm::pdb;
  29. InputFile::InputFile() {}
  30. InputFile::~InputFile() {}
  31. static Expected<ModuleDebugStreamRef>
  32. getModuleDebugStream(PDBFile &File, StringRef &ModuleName, uint32_t Index) {
  33. ExitOnError Err("Unexpected error: ");
  34. auto &Dbi = Err(File.getPDBDbiStream());
  35. const auto &Modules = Dbi.modules();
  36. if (Index >= Modules.getModuleCount())
  37. return make_error<RawError>(raw_error_code::index_out_of_bounds,
  38. "Invalid module index");
  39. auto Modi = Modules.getModuleDescriptor(Index);
  40. ModuleName = Modi.getModuleName();
  41. uint16_t ModiStream = Modi.getModuleStreamIndex();
  42. if (ModiStream == kInvalidStreamIndex)
  43. return make_error<RawError>(raw_error_code::no_stream,
  44. "Module stream not present");
  45. auto ModStreamData = File.createIndexedStream(ModiStream);
  46. ModuleDebugStreamRef ModS(Modi, std::move(ModStreamData));
  47. if (auto EC = ModS.reload())
  48. return make_error<RawError>(raw_error_code::corrupt_file,
  49. "Invalid module stream");
  50. return std::move(ModS);
  51. }
  52. static inline bool isCodeViewDebugSubsection(object::SectionRef Section,
  53. StringRef Name,
  54. BinaryStreamReader &Reader) {
  55. StringRef SectionName;
  56. if (Section.getName(SectionName))
  57. return false;
  58. if (SectionName != Name)
  59. return false;
  60. Expected<StringRef> ContentsOrErr = Section.getContents();
  61. if (!ContentsOrErr) {
  62. consumeError(ContentsOrErr.takeError());
  63. return false;
  64. }
  65. Reader = BinaryStreamReader(*ContentsOrErr, support::little);
  66. uint32_t Magic;
  67. if (Reader.bytesRemaining() < sizeof(uint32_t))
  68. return false;
  69. cantFail(Reader.readInteger(Magic));
  70. if (Magic != COFF::DEBUG_SECTION_MAGIC)
  71. return false;
  72. return true;
  73. }
  74. static inline bool isDebugSSection(object::SectionRef Section,
  75. DebugSubsectionArray &Subsections) {
  76. BinaryStreamReader Reader;
  77. if (!isCodeViewDebugSubsection(Section, ".debug$S", Reader))
  78. return false;
  79. cantFail(Reader.readArray(Subsections, Reader.bytesRemaining()));
  80. return true;
  81. }
  82. static bool isDebugTSection(SectionRef Section, CVTypeArray &Types) {
  83. BinaryStreamReader Reader;
  84. if (!isCodeViewDebugSubsection(Section, ".debug$T", Reader) &&
  85. !isCodeViewDebugSubsection(Section, ".debug$P", Reader))
  86. return false;
  87. cantFail(Reader.readArray(Types, Reader.bytesRemaining()));
  88. return true;
  89. }
  90. static std::string formatChecksumKind(FileChecksumKind Kind) {
  91. switch (Kind) {
  92. RETURN_CASE(FileChecksumKind, None, "None");
  93. RETURN_CASE(FileChecksumKind, MD5, "MD5");
  94. RETURN_CASE(FileChecksumKind, SHA1, "SHA-1");
  95. RETURN_CASE(FileChecksumKind, SHA256, "SHA-256");
  96. }
  97. return formatUnknownEnum(Kind);
  98. }
  99. template <typename... Args>
  100. static void formatInternal(LinePrinter &Printer, bool Append, Args &&... args) {
  101. if (Append)
  102. Printer.format(std::forward<Args>(args)...);
  103. else
  104. Printer.formatLine(std::forward<Args>(args)...);
  105. }
  106. SymbolGroup::SymbolGroup(InputFile *File, uint32_t GroupIndex) : File(File) {
  107. if (!File)
  108. return;
  109. if (File->isPdb())
  110. initializeForPdb(GroupIndex);
  111. else {
  112. Name = ".debug$S";
  113. uint32_t I = 0;
  114. for (const auto &S : File->obj().sections()) {
  115. DebugSubsectionArray SS;
  116. if (!isDebugSSection(S, SS))
  117. continue;
  118. if (!SC.hasChecksums() || !SC.hasStrings())
  119. SC.initialize(SS);
  120. if (I == GroupIndex)
  121. Subsections = SS;
  122. if (SC.hasChecksums() && SC.hasStrings())
  123. break;
  124. }
  125. rebuildChecksumMap();
  126. }
  127. }
  128. StringRef SymbolGroup::name() const { return Name; }
  129. void SymbolGroup::updateDebugS(const codeview::DebugSubsectionArray &SS) {
  130. Subsections = SS;
  131. }
  132. void SymbolGroup::updatePdbModi(uint32_t Modi) { initializeForPdb(Modi); }
  133. void SymbolGroup::initializeForPdb(uint32_t Modi) {
  134. assert(File && File->isPdb());
  135. // PDB always uses the same string table, but each module has its own
  136. // checksums. So we only set the strings if they're not already set.
  137. if (!SC.hasStrings()) {
  138. auto StringTable = File->pdb().getStringTable();
  139. if (StringTable)
  140. SC.setStrings(StringTable->getStringTable());
  141. else
  142. consumeError(StringTable.takeError());
  143. }
  144. SC.resetChecksums();
  145. auto MDS = getModuleDebugStream(File->pdb(), Name, Modi);
  146. if (!MDS) {
  147. consumeError(MDS.takeError());
  148. return;
  149. }
  150. DebugStream = std::make_shared<ModuleDebugStreamRef>(std::move(*MDS));
  151. Subsections = DebugStream->getSubsectionsArray();
  152. SC.initialize(Subsections);
  153. rebuildChecksumMap();
  154. }
  155. void SymbolGroup::rebuildChecksumMap() {
  156. if (!SC.hasChecksums())
  157. return;
  158. for (const auto &Entry : SC.checksums()) {
  159. auto S = SC.strings().getString(Entry.FileNameOffset);
  160. if (!S)
  161. continue;
  162. ChecksumsByFile[*S] = Entry;
  163. }
  164. }
  165. const ModuleDebugStreamRef &SymbolGroup::getPdbModuleStream() const {
  166. assert(File && File->isPdb() && DebugStream);
  167. return *DebugStream;
  168. }
  169. Expected<StringRef> SymbolGroup::getNameFromStringTable(uint32_t Offset) const {
  170. return SC.strings().getString(Offset);
  171. }
  172. void SymbolGroup::formatFromFileName(LinePrinter &Printer, StringRef File,
  173. bool Append) const {
  174. auto FC = ChecksumsByFile.find(File);
  175. if (FC == ChecksumsByFile.end()) {
  176. formatInternal(Printer, Append, "- (no checksum) {0}", File);
  177. return;
  178. }
  179. formatInternal(Printer, Append, "- ({0}: {1}) {2}",
  180. formatChecksumKind(FC->getValue().Kind),
  181. toHex(FC->getValue().Checksum), File);
  182. }
  183. void SymbolGroup::formatFromChecksumsOffset(LinePrinter &Printer,
  184. uint32_t Offset,
  185. bool Append) const {
  186. if (!SC.hasChecksums()) {
  187. formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
  188. return;
  189. }
  190. auto Iter = SC.checksums().getArray().at(Offset);
  191. if (Iter == SC.checksums().getArray().end()) {
  192. formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
  193. return;
  194. }
  195. uint32_t FO = Iter->FileNameOffset;
  196. auto ExpectedFile = getNameFromStringTable(FO);
  197. if (!ExpectedFile) {
  198. formatInternal(Printer, Append, "(unknown file name offset {0})", Offset);
  199. consumeError(ExpectedFile.takeError());
  200. return;
  201. }
  202. if (Iter->Kind == FileChecksumKind::None) {
  203. formatInternal(Printer, Append, "{0} (no checksum)", *ExpectedFile);
  204. } else {
  205. formatInternal(Printer, Append, "{0} ({1}: {2})", *ExpectedFile,
  206. formatChecksumKind(Iter->Kind), toHex(Iter->Checksum));
  207. }
  208. }
  209. Expected<InputFile> InputFile::open(StringRef Path, bool AllowUnknownFile) {
  210. InputFile IF;
  211. if (!llvm::sys::fs::exists(Path))
  212. return make_error<StringError>(formatv("File {0} not found", Path),
  213. inconvertibleErrorCode());
  214. file_magic Magic;
  215. if (auto EC = identify_magic(Path, Magic))
  216. return make_error<StringError>(
  217. formatv("Unable to identify file type for file {0}", Path), EC);
  218. if (Magic == file_magic::coff_object) {
  219. Expected<OwningBinary<Binary>> BinaryOrErr = createBinary(Path);
  220. if (!BinaryOrErr)
  221. return BinaryOrErr.takeError();
  222. IF.CoffObject = std::move(*BinaryOrErr);
  223. IF.PdbOrObj = llvm::cast<COFFObjectFile>(IF.CoffObject.getBinary());
  224. return std::move(IF);
  225. }
  226. if (Magic == file_magic::pdb) {
  227. std::unique_ptr<IPDBSession> Session;
  228. if (auto Err = loadDataForPDB(PDB_ReaderType::Native, Path, Session))
  229. return std::move(Err);
  230. IF.PdbSession.reset(static_cast<NativeSession *>(Session.release()));
  231. IF.PdbOrObj = &IF.PdbSession->getPDBFile();
  232. return std::move(IF);
  233. }
  234. if (!AllowUnknownFile)
  235. return make_error<StringError>(
  236. formatv("File {0} is not a supported file type", Path),
  237. inconvertibleErrorCode());
  238. auto Result = MemoryBuffer::getFile(Path, -1LL, false);
  239. if (!Result)
  240. return make_error<StringError>(
  241. formatv("File {0} could not be opened", Path), Result.getError());
  242. IF.UnknownFile = std::move(*Result);
  243. IF.PdbOrObj = IF.UnknownFile.get();
  244. return std::move(IF);
  245. }
  246. PDBFile &InputFile::pdb() {
  247. assert(isPdb());
  248. return *PdbOrObj.get<PDBFile *>();
  249. }
  250. const PDBFile &InputFile::pdb() const {
  251. assert(isPdb());
  252. return *PdbOrObj.get<PDBFile *>();
  253. }
  254. object::COFFObjectFile &InputFile::obj() {
  255. assert(isObj());
  256. return *PdbOrObj.get<object::COFFObjectFile *>();
  257. }
  258. const object::COFFObjectFile &InputFile::obj() const {
  259. assert(isObj());
  260. return *PdbOrObj.get<object::COFFObjectFile *>();
  261. }
  262. MemoryBuffer &InputFile::unknown() {
  263. assert(isUnknown());
  264. return *PdbOrObj.get<MemoryBuffer *>();
  265. }
  266. const MemoryBuffer &InputFile::unknown() const {
  267. assert(isUnknown());
  268. return *PdbOrObj.get<MemoryBuffer *>();
  269. }
  270. StringRef InputFile::getFilePath() const {
  271. if (isPdb())
  272. return pdb().getFilePath();
  273. if (isObj())
  274. return obj().getFileName();
  275. assert(isUnknown());
  276. return unknown().getBufferIdentifier();
  277. }
  278. bool InputFile::hasTypes() const {
  279. if (isPdb())
  280. return pdb().hasPDBTpiStream();
  281. for (const auto &Section : obj().sections()) {
  282. CVTypeArray Types;
  283. if (isDebugTSection(Section, Types))
  284. return true;
  285. }
  286. return false;
  287. }
  288. bool InputFile::hasIds() const {
  289. if (isObj())
  290. return false;
  291. return pdb().hasPDBIpiStream();
  292. }
  293. bool InputFile::isPdb() const { return PdbOrObj.is<PDBFile *>(); }
  294. bool InputFile::isObj() const {
  295. return PdbOrObj.is<object::COFFObjectFile *>();
  296. }
  297. bool InputFile::isUnknown() const { return PdbOrObj.is<MemoryBuffer *>(); }
  298. codeview::LazyRandomTypeCollection &
  299. InputFile::getOrCreateTypeCollection(TypeCollectionKind Kind) {
  300. if (Types && Kind == kTypes)
  301. return *Types;
  302. if (Ids && Kind == kIds)
  303. return *Ids;
  304. if (Kind == kIds) {
  305. assert(isPdb() && pdb().hasPDBIpiStream());
  306. }
  307. // If the collection was already initialized, we should have just returned it
  308. // in step 1.
  309. if (isPdb()) {
  310. TypeCollectionPtr &Collection = (Kind == kIds) ? Ids : Types;
  311. auto &Stream = cantFail((Kind == kIds) ? pdb().getPDBIpiStream()
  312. : pdb().getPDBTpiStream());
  313. auto &Array = Stream.typeArray();
  314. uint32_t Count = Stream.getNumTypeRecords();
  315. auto Offsets = Stream.getTypeIndexOffsets();
  316. Collection =
  317. llvm::make_unique<LazyRandomTypeCollection>(Array, Count, Offsets);
  318. return *Collection;
  319. }
  320. assert(isObj());
  321. assert(Kind == kTypes);
  322. assert(!Types);
  323. for (const auto &Section : obj().sections()) {
  324. CVTypeArray Records;
  325. if (!isDebugTSection(Section, Records))
  326. continue;
  327. Types = llvm::make_unique<LazyRandomTypeCollection>(Records, 100);
  328. return *Types;
  329. }
  330. Types = llvm::make_unique<LazyRandomTypeCollection>(100);
  331. return *Types;
  332. }
  333. codeview::LazyRandomTypeCollection &InputFile::types() {
  334. return getOrCreateTypeCollection(kTypes);
  335. }
  336. codeview::LazyRandomTypeCollection &InputFile::ids() {
  337. // Object files have only one type stream that contains both types and ids.
  338. // Similarly, some PDBs don't contain an IPI stream, and for those both types
  339. // and IDs are in the same stream.
  340. if (isObj() || !pdb().hasPDBIpiStream())
  341. return types();
  342. return getOrCreateTypeCollection(kIds);
  343. }
  344. iterator_range<SymbolGroupIterator> InputFile::symbol_groups() {
  345. return make_range<SymbolGroupIterator>(symbol_groups_begin(),
  346. symbol_groups_end());
  347. }
  348. SymbolGroupIterator InputFile::symbol_groups_begin() {
  349. return SymbolGroupIterator(*this);
  350. }
  351. SymbolGroupIterator InputFile::symbol_groups_end() {
  352. return SymbolGroupIterator();
  353. }
  354. SymbolGroupIterator::SymbolGroupIterator() : Value(nullptr) {}
  355. SymbolGroupIterator::SymbolGroupIterator(InputFile &File) : Value(&File) {
  356. if (File.isObj()) {
  357. SectionIter = File.obj().section_begin();
  358. scanToNextDebugS();
  359. }
  360. }
  361. bool SymbolGroupIterator::operator==(const SymbolGroupIterator &R) const {
  362. bool E = isEnd();
  363. bool RE = R.isEnd();
  364. if (E || RE)
  365. return E == RE;
  366. if (Value.File != R.Value.File)
  367. return false;
  368. return Index == R.Index;
  369. }
  370. const SymbolGroup &SymbolGroupIterator::operator*() const {
  371. assert(!isEnd());
  372. return Value;
  373. }
  374. SymbolGroup &SymbolGroupIterator::operator*() {
  375. assert(!isEnd());
  376. return Value;
  377. }
  378. SymbolGroupIterator &SymbolGroupIterator::operator++() {
  379. assert(Value.File && !isEnd());
  380. ++Index;
  381. if (isEnd())
  382. return *this;
  383. if (Value.File->isPdb()) {
  384. Value.updatePdbModi(Index);
  385. return *this;
  386. }
  387. scanToNextDebugS();
  388. return *this;
  389. }
  390. void SymbolGroupIterator::scanToNextDebugS() {
  391. assert(SectionIter.hasValue());
  392. auto End = Value.File->obj().section_end();
  393. auto &Iter = *SectionIter;
  394. assert(!isEnd());
  395. while (++Iter != End) {
  396. DebugSubsectionArray SS;
  397. SectionRef SR = *Iter;
  398. if (!isDebugSSection(SR, SS))
  399. continue;
  400. Value.updateDebugS(SS);
  401. return;
  402. }
  403. }
  404. bool SymbolGroupIterator::isEnd() const {
  405. if (!Value.File)
  406. return true;
  407. if (Value.File->isPdb()) {
  408. auto &Dbi = cantFail(Value.File->pdb().getPDBDbiStream());
  409. uint32_t Count = Dbi.modules().getModuleCount();
  410. assert(Index <= Count);
  411. return Index == Count;
  412. }
  413. assert(SectionIter.hasValue());
  414. return *SectionIter == Value.File->obj().section_end();
  415. }