Symbolize.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509
  1. //===-- LLVMSymbolize.cpp -------------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Implementation for LLVM symbolization library.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "llvm/DebugInfo/Symbolize/Symbolize.h"
  13. #include "SymbolizableObjectFile.h"
  14. #include "llvm/ADT/STLExtras.h"
  15. #include "llvm/BinaryFormat/COFF.h"
  16. #include "llvm/DebugInfo/DWARF/DWARFContext.h"
  17. #include "llvm/DebugInfo/PDB/PDB.h"
  18. #include "llvm/DebugInfo/PDB/PDBContext.h"
  19. #include "llvm/Demangle/Demangle.h"
  20. #include "llvm/Object/COFF.h"
  21. #include "llvm/Object/MachO.h"
  22. #include "llvm/Object/MachOUniversal.h"
  23. #include "llvm/Support/Casting.h"
  24. #include "llvm/Support/Compression.h"
  25. #include "llvm/Support/DataExtractor.h"
  26. #include "llvm/Support/Errc.h"
  27. #include "llvm/Support/FileSystem.h"
  28. #include "llvm/Support/MemoryBuffer.h"
  29. #include "llvm/Support/Path.h"
  30. #include <algorithm>
  31. #include <cassert>
  32. #include <cstring>
  33. #if defined(_MSC_VER)
  34. #include <Windows.h>
  35. // This must be included after windows.h.
  36. #include <DbgHelp.h>
  37. #pragma comment(lib, "dbghelp.lib")
  38. // Windows.h conflicts with our COFF header definitions.
  39. #ifdef IMAGE_FILE_MACHINE_I386
  40. #undef IMAGE_FILE_MACHINE_I386
  41. #endif
  42. #endif
  43. namespace llvm {
  44. namespace symbolize {
  45. Expected<DILineInfo>
  46. LLVMSymbolizer::symbolizeCode(const std::string &ModuleName,
  47. object::SectionedAddress ModuleOffset,
  48. StringRef DWPName) {
  49. SymbolizableModule *Info;
  50. if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName, DWPName))
  51. Info = InfoOrErr.get();
  52. else
  53. return InfoOrErr.takeError();
  54. // A null module means an error has already been reported. Return an empty
  55. // result.
  56. if (!Info)
  57. return DILineInfo();
  58. // If the user is giving us relative addresses, add the preferred base of the
  59. // object to the offset before we do the query. It's what DIContext expects.
  60. if (Opts.RelativeAddresses)
  61. ModuleOffset.Address += Info->getModulePreferredBase();
  62. DILineInfo LineInfo = Info->symbolizeCode(ModuleOffset, Opts.PrintFunctions,
  63. Opts.UseSymbolTable);
  64. if (Opts.Demangle)
  65. LineInfo.FunctionName = DemangleName(LineInfo.FunctionName, Info);
  66. return LineInfo;
  67. }
  68. Expected<DIInliningInfo>
  69. LLVMSymbolizer::symbolizeInlinedCode(const std::string &ModuleName,
  70. object::SectionedAddress ModuleOffset,
  71. StringRef DWPName) {
  72. SymbolizableModule *Info;
  73. if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName, DWPName))
  74. Info = InfoOrErr.get();
  75. else
  76. return InfoOrErr.takeError();
  77. // A null module means an error has already been reported. Return an empty
  78. // result.
  79. if (!Info)
  80. return DIInliningInfo();
  81. // If the user is giving us relative addresses, add the preferred base of the
  82. // object to the offset before we do the query. It's what DIContext expects.
  83. if (Opts.RelativeAddresses)
  84. ModuleOffset.Address += Info->getModulePreferredBase();
  85. DIInliningInfo InlinedContext = Info->symbolizeInlinedCode(
  86. ModuleOffset, Opts.PrintFunctions, Opts.UseSymbolTable);
  87. if (Opts.Demangle) {
  88. for (int i = 0, n = InlinedContext.getNumberOfFrames(); i < n; i++) {
  89. auto *Frame = InlinedContext.getMutableFrame(i);
  90. Frame->FunctionName = DemangleName(Frame->FunctionName, Info);
  91. }
  92. }
  93. return InlinedContext;
  94. }
  95. Expected<DIGlobal>
  96. LLVMSymbolizer::symbolizeData(const std::string &ModuleName,
  97. object::SectionedAddress ModuleOffset) {
  98. SymbolizableModule *Info;
  99. if (auto InfoOrErr = getOrCreateModuleInfo(ModuleName))
  100. Info = InfoOrErr.get();
  101. else
  102. return InfoOrErr.takeError();
  103. // A null module means an error has already been reported. Return an empty
  104. // result.
  105. if (!Info)
  106. return DIGlobal();
  107. // If the user is giving us relative addresses, add the preferred base of
  108. // the object to the offset before we do the query. It's what DIContext
  109. // expects.
  110. if (Opts.RelativeAddresses)
  111. ModuleOffset.Address += Info->getModulePreferredBase();
  112. DIGlobal Global = Info->symbolizeData(ModuleOffset);
  113. if (Opts.Demangle)
  114. Global.Name = DemangleName(Global.Name, Info);
  115. return Global;
  116. }
  117. void LLVMSymbolizer::flush() {
  118. ObjectForUBPathAndArch.clear();
  119. BinaryForPath.clear();
  120. ObjectPairForPathArch.clear();
  121. Modules.clear();
  122. }
  123. namespace {
  124. // For Path="/path/to/foo" and Basename="foo" assume that debug info is in
  125. // /path/to/foo.dSYM/Contents/Resources/DWARF/foo.
  126. // For Path="/path/to/bar.dSYM" and Basename="foo" assume that debug info is in
  127. // /path/to/bar.dSYM/Contents/Resources/DWARF/foo.
  128. std::string getDarwinDWARFResourceForPath(
  129. const std::string &Path, const std::string &Basename) {
  130. SmallString<16> ResourceName = StringRef(Path);
  131. if (sys::path::extension(Path) != ".dSYM") {
  132. ResourceName += ".dSYM";
  133. }
  134. sys::path::append(ResourceName, "Contents", "Resources", "DWARF");
  135. sys::path::append(ResourceName, Basename);
  136. return ResourceName.str();
  137. }
  138. bool checkFileCRC(StringRef Path, uint32_t CRCHash) {
  139. ErrorOr<std::unique_ptr<MemoryBuffer>> MB =
  140. MemoryBuffer::getFileOrSTDIN(Path);
  141. if (!MB)
  142. return false;
  143. return !zlib::isAvailable() || CRCHash == zlib::crc32(MB.get()->getBuffer());
  144. }
  145. bool findDebugBinary(const std::string &OrigPath,
  146. const std::string &DebuglinkName, uint32_t CRCHash,
  147. const std::string &FallbackDebugPath,
  148. std::string &Result) {
  149. SmallString<16> OrigDir(OrigPath);
  150. llvm::sys::path::remove_filename(OrigDir);
  151. SmallString<16> DebugPath = OrigDir;
  152. // Try relative/path/to/original_binary/debuglink_name
  153. llvm::sys::path::append(DebugPath, DebuglinkName);
  154. if (checkFileCRC(DebugPath, CRCHash)) {
  155. Result = DebugPath.str();
  156. return true;
  157. }
  158. // Try relative/path/to/original_binary/.debug/debuglink_name
  159. DebugPath = OrigDir;
  160. llvm::sys::path::append(DebugPath, ".debug", DebuglinkName);
  161. if (checkFileCRC(DebugPath, CRCHash)) {
  162. Result = DebugPath.str();
  163. return true;
  164. }
  165. // Make the path absolute so that lookups will go to
  166. // "/usr/lib/debug/full/path/to/debug", not
  167. // "/usr/lib/debug/to/debug"
  168. llvm::sys::fs::make_absolute(OrigDir);
  169. if (!FallbackDebugPath.empty()) {
  170. // Try <FallbackDebugPath>/absolute/path/to/original_binary/debuglink_name
  171. DebugPath = FallbackDebugPath;
  172. } else {
  173. #if defined(__NetBSD__)
  174. // Try /usr/libdata/debug/absolute/path/to/original_binary/debuglink_name
  175. DebugPath = "/usr/libdata/debug";
  176. #else
  177. // Try /usr/lib/debug/absolute/path/to/original_binary/debuglink_name
  178. DebugPath = "/usr/lib/debug";
  179. #endif
  180. }
  181. llvm::sys::path::append(DebugPath, llvm::sys::path::relative_path(OrigDir),
  182. DebuglinkName);
  183. if (checkFileCRC(DebugPath, CRCHash)) {
  184. Result = DebugPath.str();
  185. return true;
  186. }
  187. return false;
  188. }
  189. bool getGNUDebuglinkContents(const ObjectFile *Obj, std::string &DebugName,
  190. uint32_t &CRCHash) {
  191. if (!Obj)
  192. return false;
  193. for (const SectionRef &Section : Obj->sections()) {
  194. StringRef Name;
  195. Section.getName(Name);
  196. Name = Name.substr(Name.find_first_not_of("._"));
  197. if (Name == "gnu_debuglink") {
  198. StringRef Data;
  199. Section.getContents(Data);
  200. DataExtractor DE(Data, Obj->isLittleEndian(), 0);
  201. uint32_t Offset = 0;
  202. if (const char *DebugNameStr = DE.getCStr(&Offset)) {
  203. // 4-byte align the offset.
  204. Offset = (Offset + 3) & ~0x3;
  205. if (DE.isValidOffsetForDataOfSize(Offset, 4)) {
  206. DebugName = DebugNameStr;
  207. CRCHash = DE.getU32(&Offset);
  208. return true;
  209. }
  210. }
  211. break;
  212. }
  213. }
  214. return false;
  215. }
  216. bool darwinDsymMatchesBinary(const MachOObjectFile *DbgObj,
  217. const MachOObjectFile *Obj) {
  218. ArrayRef<uint8_t> dbg_uuid = DbgObj->getUuid();
  219. ArrayRef<uint8_t> bin_uuid = Obj->getUuid();
  220. if (dbg_uuid.empty() || bin_uuid.empty())
  221. return false;
  222. return !memcmp(dbg_uuid.data(), bin_uuid.data(), dbg_uuid.size());
  223. }
  224. } // end anonymous namespace
  225. ObjectFile *LLVMSymbolizer::lookUpDsymFile(const std::string &ExePath,
  226. const MachOObjectFile *MachExeObj, const std::string &ArchName) {
  227. // On Darwin we may find DWARF in separate object file in
  228. // resource directory.
  229. std::vector<std::string> DsymPaths;
  230. StringRef Filename = sys::path::filename(ExePath);
  231. DsymPaths.push_back(getDarwinDWARFResourceForPath(ExePath, Filename));
  232. for (const auto &Path : Opts.DsymHints) {
  233. DsymPaths.push_back(getDarwinDWARFResourceForPath(Path, Filename));
  234. }
  235. for (const auto &Path : DsymPaths) {
  236. auto DbgObjOrErr = getOrCreateObject(Path, ArchName);
  237. if (!DbgObjOrErr) {
  238. // Ignore errors, the file might not exist.
  239. consumeError(DbgObjOrErr.takeError());
  240. continue;
  241. }
  242. ObjectFile *DbgObj = DbgObjOrErr.get();
  243. if (!DbgObj)
  244. continue;
  245. const MachOObjectFile *MachDbgObj = dyn_cast<const MachOObjectFile>(DbgObj);
  246. if (!MachDbgObj)
  247. continue;
  248. if (darwinDsymMatchesBinary(MachDbgObj, MachExeObj))
  249. return DbgObj;
  250. }
  251. return nullptr;
  252. }
  253. ObjectFile *LLVMSymbolizer::lookUpDebuglinkObject(const std::string &Path,
  254. const ObjectFile *Obj,
  255. const std::string &ArchName) {
  256. std::string DebuglinkName;
  257. uint32_t CRCHash;
  258. std::string DebugBinaryPath;
  259. if (!getGNUDebuglinkContents(Obj, DebuglinkName, CRCHash))
  260. return nullptr;
  261. if (!findDebugBinary(Path, DebuglinkName, CRCHash, Opts.FallbackDebugPath,
  262. DebugBinaryPath))
  263. return nullptr;
  264. auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
  265. if (!DbgObjOrErr) {
  266. // Ignore errors, the file might not exist.
  267. consumeError(DbgObjOrErr.takeError());
  268. return nullptr;
  269. }
  270. return DbgObjOrErr.get();
  271. }
  272. Expected<LLVMSymbolizer::ObjectPair>
  273. LLVMSymbolizer::getOrCreateObjectPair(const std::string &Path,
  274. const std::string &ArchName) {
  275. const auto &I = ObjectPairForPathArch.find(std::make_pair(Path, ArchName));
  276. if (I != ObjectPairForPathArch.end()) {
  277. return I->second;
  278. }
  279. auto ObjOrErr = getOrCreateObject(Path, ArchName);
  280. if (!ObjOrErr) {
  281. ObjectPairForPathArch.insert(std::make_pair(std::make_pair(Path, ArchName),
  282. ObjectPair(nullptr, nullptr)));
  283. return ObjOrErr.takeError();
  284. }
  285. ObjectFile *Obj = ObjOrErr.get();
  286. assert(Obj != nullptr);
  287. ObjectFile *DbgObj = nullptr;
  288. if (auto MachObj = dyn_cast<const MachOObjectFile>(Obj))
  289. DbgObj = lookUpDsymFile(Path, MachObj, ArchName);
  290. if (!DbgObj)
  291. DbgObj = lookUpDebuglinkObject(Path, Obj, ArchName);
  292. if (!DbgObj)
  293. DbgObj = Obj;
  294. ObjectPair Res = std::make_pair(Obj, DbgObj);
  295. ObjectPairForPathArch.insert(
  296. std::make_pair(std::make_pair(Path, ArchName), Res));
  297. return Res;
  298. }
  299. Expected<ObjectFile *>
  300. LLVMSymbolizer::getOrCreateObject(const std::string &Path,
  301. const std::string &ArchName) {
  302. const auto &I = BinaryForPath.find(Path);
  303. Binary *Bin = nullptr;
  304. if (I == BinaryForPath.end()) {
  305. Expected<OwningBinary<Binary>> BinOrErr = createBinary(Path);
  306. if (!BinOrErr) {
  307. BinaryForPath.insert(std::make_pair(Path, OwningBinary<Binary>()));
  308. return BinOrErr.takeError();
  309. }
  310. Bin = BinOrErr->getBinary();
  311. BinaryForPath.insert(std::make_pair(Path, std::move(BinOrErr.get())));
  312. } else {
  313. Bin = I->second.getBinary();
  314. }
  315. if (!Bin)
  316. return static_cast<ObjectFile *>(nullptr);
  317. if (MachOUniversalBinary *UB = dyn_cast_or_null<MachOUniversalBinary>(Bin)) {
  318. const auto &I = ObjectForUBPathAndArch.find(std::make_pair(Path, ArchName));
  319. if (I != ObjectForUBPathAndArch.end()) {
  320. return I->second.get();
  321. }
  322. Expected<std::unique_ptr<ObjectFile>> ObjOrErr =
  323. UB->getObjectForArch(ArchName);
  324. if (!ObjOrErr) {
  325. ObjectForUBPathAndArch.insert(std::make_pair(
  326. std::make_pair(Path, ArchName), std::unique_ptr<ObjectFile>()));
  327. return ObjOrErr.takeError();
  328. }
  329. ObjectFile *Res = ObjOrErr->get();
  330. ObjectForUBPathAndArch.insert(std::make_pair(std::make_pair(Path, ArchName),
  331. std::move(ObjOrErr.get())));
  332. return Res;
  333. }
  334. if (Bin->isObject()) {
  335. return cast<ObjectFile>(Bin);
  336. }
  337. return errorCodeToError(object_error::arch_not_found);
  338. }
  339. Expected<SymbolizableModule *>
  340. LLVMSymbolizer::getOrCreateModuleInfo(const std::string &ModuleName,
  341. StringRef DWPName) {
  342. const auto &I = Modules.find(ModuleName);
  343. if (I != Modules.end()) {
  344. return I->second.get();
  345. }
  346. std::string BinaryName = ModuleName;
  347. std::string ArchName = Opts.DefaultArch;
  348. size_t ColonPos = ModuleName.find_last_of(':');
  349. // Verify that substring after colon form a valid arch name.
  350. if (ColonPos != std::string::npos) {
  351. std::string ArchStr = ModuleName.substr(ColonPos + 1);
  352. if (Triple(ArchStr).getArch() != Triple::UnknownArch) {
  353. BinaryName = ModuleName.substr(0, ColonPos);
  354. ArchName = ArchStr;
  355. }
  356. }
  357. auto ObjectsOrErr = getOrCreateObjectPair(BinaryName, ArchName);
  358. if (!ObjectsOrErr) {
  359. // Failed to find valid object file.
  360. Modules.insert(
  361. std::make_pair(ModuleName, std::unique_ptr<SymbolizableModule>()));
  362. return ObjectsOrErr.takeError();
  363. }
  364. ObjectPair Objects = ObjectsOrErr.get();
  365. std::unique_ptr<DIContext> Context;
  366. // If this is a COFF object containing PDB info, use a PDBContext to
  367. // symbolize. Otherwise, use DWARF.
  368. if (auto CoffObject = dyn_cast<COFFObjectFile>(Objects.first)) {
  369. const codeview::DebugInfo *DebugInfo;
  370. StringRef PDBFileName;
  371. auto EC = CoffObject->getDebugPDBInfo(DebugInfo, PDBFileName);
  372. if (!EC && DebugInfo != nullptr && !PDBFileName.empty()) {
  373. using namespace pdb;
  374. std::unique_ptr<IPDBSession> Session;
  375. if (auto Err = loadDataForEXE(PDB_ReaderType::DIA,
  376. Objects.first->getFileName(), Session)) {
  377. Modules.insert(
  378. std::make_pair(ModuleName, std::unique_ptr<SymbolizableModule>()));
  379. // Return along the PDB filename to provide more context
  380. return createFileError(PDBFileName, std::move(Err));
  381. }
  382. Context.reset(new PDBContext(*CoffObject, std::move(Session)));
  383. }
  384. }
  385. if (!Context)
  386. Context = DWARFContext::create(*Objects.second, nullptr,
  387. DWARFContext::defaultErrorHandler, DWPName);
  388. assert(Context);
  389. auto InfoOrErr =
  390. SymbolizableObjectFile::create(Objects.first, std::move(Context));
  391. std::unique_ptr<SymbolizableModule> SymMod;
  392. if (InfoOrErr)
  393. SymMod = std::move(InfoOrErr.get());
  394. auto InsertResult =
  395. Modules.insert(std::make_pair(ModuleName, std::move(SymMod)));
  396. assert(InsertResult.second);
  397. if (auto EC = InfoOrErr.getError())
  398. return errorCodeToError(EC);
  399. return InsertResult.first->second.get();
  400. }
  401. namespace {
  402. // Undo these various manglings for Win32 extern "C" functions:
  403. // cdecl - _foo
  404. // stdcall - _foo@12
  405. // fastcall - @foo@12
  406. // vectorcall - foo@@12
  407. // These are all different linkage names for 'foo'.
  408. StringRef demanglePE32ExternCFunc(StringRef SymbolName) {
  409. // Remove any '_' or '@' prefix.
  410. char Front = SymbolName.empty() ? '\0' : SymbolName[0];
  411. if (Front == '_' || Front == '@')
  412. SymbolName = SymbolName.drop_front();
  413. // Remove any '@[0-9]+' suffix.
  414. if (Front != '?') {
  415. size_t AtPos = SymbolName.rfind('@');
  416. if (AtPos != StringRef::npos &&
  417. std::all_of(SymbolName.begin() + AtPos + 1, SymbolName.end(),
  418. [](char C) { return C >= '0' && C <= '9'; })) {
  419. SymbolName = SymbolName.substr(0, AtPos);
  420. }
  421. }
  422. // Remove any ending '@' for vectorcall.
  423. if (SymbolName.endswith("@"))
  424. SymbolName = SymbolName.drop_back();
  425. return SymbolName;
  426. }
  427. } // end anonymous namespace
  428. std::string
  429. LLVMSymbolizer::DemangleName(const std::string &Name,
  430. const SymbolizableModule *DbiModuleDescriptor) {
  431. // We can spoil names of symbols with C linkage, so use an heuristic
  432. // approach to check if the name should be demangled.
  433. if (Name.substr(0, 2) == "_Z") {
  434. int status = 0;
  435. char *DemangledName = itaniumDemangle(Name.c_str(), nullptr, nullptr, &status);
  436. if (status != 0)
  437. return Name;
  438. std::string Result = DemangledName;
  439. free(DemangledName);
  440. return Result;
  441. }
  442. #if defined(_MSC_VER)
  443. if (!Name.empty() && Name.front() == '?') {
  444. // Only do MSVC C++ demangling on symbols starting with '?'.
  445. char DemangledName[1024] = {0};
  446. DWORD result = ::UnDecorateSymbolName(
  447. Name.c_str(), DemangledName, 1023,
  448. UNDNAME_NO_ACCESS_SPECIFIERS | // Strip public, private, protected
  449. UNDNAME_NO_ALLOCATION_LANGUAGE | // Strip __thiscall, __stdcall, etc
  450. UNDNAME_NO_THROW_SIGNATURES | // Strip throw() specifications
  451. UNDNAME_NO_MEMBER_TYPE | // Strip virtual, static, etc specifiers
  452. UNDNAME_NO_MS_KEYWORDS | // Strip all MS extension keywords
  453. UNDNAME_NO_FUNCTION_RETURNS); // Strip function return types
  454. return (result == 0) ? Name : std::string(DemangledName);
  455. }
  456. #endif
  457. if (DbiModuleDescriptor && DbiModuleDescriptor->isWin32Module())
  458. return std::string(demanglePE32ExternCFunc(Name));
  459. return Name;
  460. }
  461. } // namespace symbolize
  462. } // namespace llvm