SymbolizableObjectFile.cpp 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321
  1. //===- SymbolizableObjectFile.cpp -----------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // Implementation of SymbolizableObjectFile class.
  10. //
  11. //===----------------------------------------------------------------------===//
  12. #include "SymbolizableObjectFile.h"
  13. #include "llvm/ADT/STLExtras.h"
  14. #include "llvm/ADT/StringRef.h"
  15. #include "llvm/ADT/Triple.h"
  16. #include "llvm/BinaryFormat/COFF.h"
  17. #include "llvm/DebugInfo/DWARF/DWARFContext.h"
  18. #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
  19. #include "llvm/Object/COFF.h"
  20. #include "llvm/Object/ObjectFile.h"
  21. #include "llvm/Object/SymbolSize.h"
  22. #include "llvm/Support/Casting.h"
  23. #include "llvm/Support/DataExtractor.h"
  24. #include "llvm/Support/Error.h"
  25. #include <algorithm>
  26. #include <cstdint>
  27. #include <memory>
  28. #include <string>
  29. #include <system_error>
  30. #include <utility>
  31. #include <vector>
  32. using namespace llvm;
  33. using namespace object;
  34. using namespace symbolize;
  35. static DILineInfoSpecifier
  36. getDILineInfoSpecifier(FunctionNameKind FNKind) {
  37. return DILineInfoSpecifier(
  38. DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath, FNKind);
  39. }
  40. ErrorOr<std::unique_ptr<SymbolizableObjectFile>>
  41. SymbolizableObjectFile::create(object::ObjectFile *Obj,
  42. std::unique_ptr<DIContext> DICtx) {
  43. std::unique_ptr<SymbolizableObjectFile> res(
  44. new SymbolizableObjectFile(Obj, std::move(DICtx)));
  45. std::unique_ptr<DataExtractor> OpdExtractor;
  46. uint64_t OpdAddress = 0;
  47. // Find the .opd (function descriptor) section if any, for big-endian
  48. // PowerPC64 ELF.
  49. if (Obj->getArch() == Triple::ppc64) {
  50. for (section_iterator Section : Obj->sections()) {
  51. StringRef Name;
  52. if (auto EC = Section->getName(Name))
  53. return EC;
  54. if (Name == ".opd") {
  55. Expected<StringRef> E = Section->getContents();
  56. if (!E)
  57. return errorToErrorCode(E.takeError());
  58. OpdExtractor.reset(new DataExtractor(*E, Obj->isLittleEndian(),
  59. Obj->getBytesInAddress()));
  60. OpdAddress = Section->getAddress();
  61. break;
  62. }
  63. }
  64. }
  65. std::vector<std::pair<SymbolRef, uint64_t>> Symbols =
  66. computeSymbolSizes(*Obj);
  67. for (auto &P : Symbols)
  68. res->addSymbol(P.first, P.second, OpdExtractor.get(), OpdAddress);
  69. // If this is a COFF object and we didn't find any symbols, try the export
  70. // table.
  71. if (Symbols.empty()) {
  72. if (auto *CoffObj = dyn_cast<COFFObjectFile>(Obj))
  73. if (auto EC = res->addCoffExportSymbols(CoffObj))
  74. return EC;
  75. }
  76. std::vector<std::pair<SymbolDesc, StringRef>> &Fs = res->Functions,
  77. &Os = res->Objects;
  78. auto Uniquify = [](std::vector<std::pair<SymbolDesc, StringRef>> &S) {
  79. // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr,
  80. // pick the one with the largest Size. This helps us avoid symbols with no
  81. // size information (Size=0).
  82. llvm::sort(S);
  83. auto I = S.begin(), E = S.end(), J = S.begin();
  84. while (I != E) {
  85. auto OI = I;
  86. while (++I != E && OI->first.Addr == I->first.Addr) {
  87. }
  88. *J++ = I[-1];
  89. }
  90. S.erase(J, S.end());
  91. };
  92. Uniquify(Fs);
  93. Uniquify(Os);
  94. return std::move(res);
  95. }
  96. SymbolizableObjectFile::SymbolizableObjectFile(ObjectFile *Obj,
  97. std::unique_ptr<DIContext> DICtx)
  98. : Module(Obj), DebugInfoContext(std::move(DICtx)) {}
  99. namespace {
  100. struct OffsetNamePair {
  101. uint32_t Offset;
  102. StringRef Name;
  103. bool operator<(const OffsetNamePair &R) const {
  104. return Offset < R.Offset;
  105. }
  106. };
  107. } // end anonymous namespace
  108. std::error_code SymbolizableObjectFile::addCoffExportSymbols(
  109. const COFFObjectFile *CoffObj) {
  110. // Get all export names and offsets.
  111. std::vector<OffsetNamePair> ExportSyms;
  112. for (const ExportDirectoryEntryRef &Ref : CoffObj->export_directories()) {
  113. StringRef Name;
  114. uint32_t Offset;
  115. if (auto EC = Ref.getSymbolName(Name))
  116. return EC;
  117. if (auto EC = Ref.getExportRVA(Offset))
  118. return EC;
  119. ExportSyms.push_back(OffsetNamePair{Offset, Name});
  120. }
  121. if (ExportSyms.empty())
  122. return std::error_code();
  123. // Sort by ascending offset.
  124. array_pod_sort(ExportSyms.begin(), ExportSyms.end());
  125. // Approximate the symbol sizes by assuming they run to the next symbol.
  126. // FIXME: This assumes all exports are functions.
  127. uint64_t ImageBase = CoffObj->getImageBase();
  128. for (auto I = ExportSyms.begin(), E = ExportSyms.end(); I != E; ++I) {
  129. OffsetNamePair &Export = *I;
  130. // FIXME: The last export has a one byte size now.
  131. uint32_t NextOffset = I != E ? I->Offset : Export.Offset + 1;
  132. uint64_t SymbolStart = ImageBase + Export.Offset;
  133. uint64_t SymbolSize = NextOffset - Export.Offset;
  134. SymbolDesc SD = {SymbolStart, SymbolSize};
  135. Functions.emplace_back(SD, Export.Name);
  136. }
  137. return std::error_code();
  138. }
  139. std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol,
  140. uint64_t SymbolSize,
  141. DataExtractor *OpdExtractor,
  142. uint64_t OpdAddress) {
  143. // Avoid adding symbols from an unknown/undefined section.
  144. const ObjectFile *Obj = Symbol.getObject();
  145. Expected<section_iterator> Sec = Symbol.getSection();
  146. if (!Sec || (Obj && Obj->section_end() == *Sec))
  147. return std::error_code();
  148. Expected<SymbolRef::Type> SymbolTypeOrErr = Symbol.getType();
  149. if (!SymbolTypeOrErr)
  150. return errorToErrorCode(SymbolTypeOrErr.takeError());
  151. SymbolRef::Type SymbolType = *SymbolTypeOrErr;
  152. if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data)
  153. return std::error_code();
  154. Expected<uint64_t> SymbolAddressOrErr = Symbol.getAddress();
  155. if (!SymbolAddressOrErr)
  156. return errorToErrorCode(SymbolAddressOrErr.takeError());
  157. uint64_t SymbolAddress = *SymbolAddressOrErr;
  158. if (OpdExtractor) {
  159. // For big-endian PowerPC64 ELF, symbols in the .opd section refer to
  160. // function descriptors. The first word of the descriptor is a pointer to
  161. // the function's code.
  162. // For the purposes of symbolization, pretend the symbol's address is that
  163. // of the function's code, not the descriptor.
  164. uint64_t OpdOffset = SymbolAddress - OpdAddress;
  165. uint32_t OpdOffset32 = OpdOffset;
  166. if (OpdOffset == OpdOffset32 &&
  167. OpdExtractor->isValidOffsetForAddress(OpdOffset32))
  168. SymbolAddress = OpdExtractor->getAddress(&OpdOffset32);
  169. }
  170. Expected<StringRef> SymbolNameOrErr = Symbol.getName();
  171. if (!SymbolNameOrErr)
  172. return errorToErrorCode(SymbolNameOrErr.takeError());
  173. StringRef SymbolName = *SymbolNameOrErr;
  174. // Mach-O symbol table names have leading underscore, skip it.
  175. if (Module->isMachO() && !SymbolName.empty() && SymbolName[0] == '_')
  176. SymbolName = SymbolName.drop_front();
  177. // FIXME: If a function has alias, there are two entries in symbol table
  178. // with same address size. Make sure we choose the correct one.
  179. auto &M = SymbolType == SymbolRef::ST_Function ? Functions : Objects;
  180. SymbolDesc SD = { SymbolAddress, SymbolSize };
  181. M.emplace_back(SD, SymbolName);
  182. return std::error_code();
  183. }
  184. // Return true if this is a 32-bit x86 PE COFF module.
  185. bool SymbolizableObjectFile::isWin32Module() const {
  186. auto *CoffObject = dyn_cast<COFFObjectFile>(Module);
  187. return CoffObject && CoffObject->getMachine() == COFF::IMAGE_FILE_MACHINE_I386;
  188. }
  189. uint64_t SymbolizableObjectFile::getModulePreferredBase() const {
  190. if (auto *CoffObject = dyn_cast<COFFObjectFile>(Module))
  191. return CoffObject->getImageBase();
  192. return 0;
  193. }
  194. bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type,
  195. uint64_t Address,
  196. std::string &Name,
  197. uint64_t &Addr,
  198. uint64_t &Size) const {
  199. const auto &Symbols = Type == SymbolRef::ST_Function ? Functions : Objects;
  200. std::pair<SymbolDesc, StringRef> SD{{Address, UINT64_C(-1)}, StringRef()};
  201. auto SymbolIterator = llvm::upper_bound(Symbols, SD);
  202. if (SymbolIterator == Symbols.begin())
  203. return false;
  204. --SymbolIterator;
  205. if (SymbolIterator->first.Size != 0 &&
  206. SymbolIterator->first.Addr + SymbolIterator->first.Size <= Address)
  207. return false;
  208. Name = SymbolIterator->second.str();
  209. Addr = SymbolIterator->first.Addr;
  210. Size = SymbolIterator->first.Size;
  211. return true;
  212. }
  213. bool SymbolizableObjectFile::shouldOverrideWithSymbolTable(
  214. FunctionNameKind FNKind, bool UseSymbolTable) const {
  215. // When DWARF is used with -gline-tables-only / -gmlt, the symbol table gives
  216. // better answers for linkage names than the DIContext. Otherwise, we are
  217. // probably using PEs and PDBs, and we shouldn't do the override. PE files
  218. // generally only contain the names of exported symbols.
  219. return FNKind == FunctionNameKind::LinkageName && UseSymbolTable &&
  220. isa<DWARFContext>(DebugInfoContext.get());
  221. }
  222. DILineInfo
  223. SymbolizableObjectFile::symbolizeCode(object::SectionedAddress ModuleOffset,
  224. FunctionNameKind FNKind,
  225. bool UseSymbolTable) const {
  226. DILineInfo LineInfo;
  227. if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
  228. ModuleOffset.SectionIndex =
  229. getModuleSectionIndexForAddress(ModuleOffset.Address);
  230. if (DebugInfoContext) {
  231. LineInfo = DebugInfoContext->getLineInfoForAddress(
  232. ModuleOffset, getDILineInfoSpecifier(FNKind));
  233. }
  234. // Override function name from symbol table if necessary.
  235. if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
  236. std::string FunctionName;
  237. uint64_t Start, Size;
  238. if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
  239. FunctionName, Start, Size)) {
  240. LineInfo.FunctionName = FunctionName;
  241. }
  242. }
  243. return LineInfo;
  244. }
  245. DIInliningInfo SymbolizableObjectFile::symbolizeInlinedCode(
  246. object::SectionedAddress ModuleOffset, FunctionNameKind FNKind,
  247. bool UseSymbolTable) const {
  248. DIInliningInfo InlinedContext;
  249. if (ModuleOffset.SectionIndex == object::SectionedAddress::UndefSection)
  250. ModuleOffset.SectionIndex =
  251. getModuleSectionIndexForAddress(ModuleOffset.Address);
  252. if (DebugInfoContext)
  253. InlinedContext = DebugInfoContext->getInliningInfoForAddress(
  254. ModuleOffset, getDILineInfoSpecifier(FNKind));
  255. // Make sure there is at least one frame in context.
  256. if (InlinedContext.getNumberOfFrames() == 0)
  257. InlinedContext.addFrame(DILineInfo());
  258. // Override the function name in lower frame with name from symbol table.
  259. if (shouldOverrideWithSymbolTable(FNKind, UseSymbolTable)) {
  260. std::string FunctionName;
  261. uint64_t Start, Size;
  262. if (getNameFromSymbolTable(SymbolRef::ST_Function, ModuleOffset.Address,
  263. FunctionName, Start, Size)) {
  264. InlinedContext.getMutableFrame(InlinedContext.getNumberOfFrames() - 1)
  265. ->FunctionName = FunctionName;
  266. }
  267. }
  268. return InlinedContext;
  269. }
  270. DIGlobal SymbolizableObjectFile::symbolizeData(
  271. object::SectionedAddress ModuleOffset) const {
  272. DIGlobal Res;
  273. getNameFromSymbolTable(SymbolRef::ST_Data, ModuleOffset.Address, Res.Name,
  274. Res.Start, Res.Size);
  275. return Res;
  276. }
  277. /// Search for the first occurence of specified Address in ObjectFile.
  278. uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress(
  279. uint64_t Address) const {
  280. for (SectionRef Sec : Module->sections()) {
  281. if (!Sec.isText() || Sec.isVirtual())
  282. continue;
  283. if (Address >= Sec.getAddress() &&
  284. Address < Sec.getAddress() + Sec.getSize())
  285. return Sec.getIndex();
  286. }
  287. return object::SectionedAddress::UndefSection;
  288. }