GsymCreator.cpp 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274
  1. //===- GsymCreator.cpp ----------------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //===----------------------------------------------------------------------===//
  7. #include "llvm/DebugInfo/GSYM/GsymCreator.h"
  8. #include "llvm/DebugInfo/GSYM/FileWriter.h"
  9. #include "llvm/DebugInfo/GSYM/Header.h"
  10. #include "llvm/DebugInfo/GSYM/LineTable.h"
  11. #include "llvm/MC/StringTableBuilder.h"
  12. #include "llvm/Support/raw_ostream.h"
  13. #include <algorithm>
  14. #include <cassert>
  15. using namespace llvm;
  16. using namespace gsym;
  17. GsymCreator::GsymCreator() : StrTab(StringTableBuilder::ELF) {
  18. insertFile(StringRef());
  19. }
  20. uint32_t GsymCreator::insertFile(StringRef Path,
  21. llvm::sys::path::Style Style) {
  22. llvm::StringRef directory = llvm::sys::path::parent_path(Path, Style);
  23. llvm::StringRef filename = llvm::sys::path::filename(Path, Style);
  24. FileEntry FE(insertString(directory), insertString(filename));
  25. std::lock_guard<std::recursive_mutex> Guard(Mutex);
  26. const auto NextIndex = Files.size();
  27. // Find FE in hash map and insert if not present.
  28. auto R = FileEntryToIndex.insert(std::make_pair(FE, NextIndex));
  29. if (R.second)
  30. Files.emplace_back(FE);
  31. return R.first->second;
  32. }
  33. llvm::Error GsymCreator::save(StringRef Path,
  34. llvm::support::endianness ByteOrder) const {
  35. std::error_code EC;
  36. raw_fd_ostream OutStrm(Path, EC);
  37. if (EC)
  38. return llvm::errorCodeToError(EC);
  39. FileWriter O(OutStrm, ByteOrder);
  40. return encode(O);
  41. }
  42. llvm::Error GsymCreator::encode(FileWriter &O) const {
  43. std::lock_guard<std::recursive_mutex> Guard(Mutex);
  44. if (Funcs.empty())
  45. return createStringError(std::errc::invalid_argument,
  46. "no functions to encode");
  47. if (!Finalized)
  48. return createStringError(std::errc::invalid_argument,
  49. "GsymCreator wasn't finalized prior to encoding");
  50. if (Funcs.size() > UINT32_MAX)
  51. return createStringError(std::errc::invalid_argument,
  52. "too many FunctionInfos");
  53. const uint64_t MinAddr = Funcs.front().startAddress();
  54. const uint64_t MaxAddr = Funcs.back().startAddress();
  55. const uint64_t AddrDelta = MaxAddr - MinAddr;
  56. Header Hdr;
  57. Hdr.Magic = GSYM_MAGIC;
  58. Hdr.Version = GSYM_VERSION;
  59. Hdr.AddrOffSize = 0;
  60. Hdr.UUIDSize = static_cast<uint8_t>(UUID.size());
  61. Hdr.BaseAddress = MinAddr;
  62. Hdr.NumAddresses = static_cast<uint32_t>(Funcs.size());
  63. Hdr.StrtabOffset = 0; // We will fix this up later.
  64. Hdr.StrtabOffset = 0; // We will fix this up later.
  65. bzero(Hdr.UUID, sizeof(Hdr.UUID));
  66. if (UUID.size() > sizeof(Hdr.UUID))
  67. return createStringError(std::errc::invalid_argument,
  68. "invalid UUID size %u", (uint32_t)UUID.size());
  69. // Set the address offset size correctly in the GSYM header.
  70. if (AddrDelta <= UINT8_MAX)
  71. Hdr.AddrOffSize = 1;
  72. else if (AddrDelta <= UINT16_MAX)
  73. Hdr.AddrOffSize = 2;
  74. else if (AddrDelta <= UINT32_MAX)
  75. Hdr.AddrOffSize = 4;
  76. else
  77. Hdr.AddrOffSize = 8;
  78. // Copy the UUID value if we have one.
  79. if (UUID.size() > 0)
  80. memcpy(Hdr.UUID, UUID.data(), UUID.size());
  81. // Write out the header.
  82. llvm::Error Err = Hdr.encode(O);
  83. if (Err)
  84. return Err;
  85. // Write out the address offsets.
  86. O.alignTo(Hdr.AddrOffSize);
  87. for (const auto &FuncInfo : Funcs) {
  88. uint64_t AddrOffset = FuncInfo.startAddress() - Hdr.BaseAddress;
  89. switch(Hdr.AddrOffSize) {
  90. case 1: O.writeU8(static_cast<uint8_t>(AddrOffset)); break;
  91. case 2: O.writeU16(static_cast<uint16_t>(AddrOffset)); break;
  92. case 4: O.writeU32(static_cast<uint32_t>(AddrOffset)); break;
  93. case 8: O.writeU64(AddrOffset); break;
  94. }
  95. }
  96. // Write out all zeros for the AddrInfoOffsets.
  97. O.alignTo(4);
  98. const off_t AddrInfoOffsetsOffset = O.tell();
  99. for (size_t i = 0, n = Funcs.size(); i < n; ++i)
  100. O.writeU32(0);
  101. // Write out the file table
  102. O.alignTo(4);
  103. assert(!Files.empty());
  104. assert(Files[0].Dir == 0);
  105. assert(Files[0].Base == 0);
  106. size_t NumFiles = Files.size();
  107. if (NumFiles > UINT32_MAX)
  108. return createStringError(std::errc::invalid_argument,
  109. "too many files");
  110. O.writeU32(static_cast<uint32_t>(NumFiles));
  111. for (auto File: Files) {
  112. O.writeU32(File.Dir);
  113. O.writeU32(File.Base);
  114. }
  115. // Write out the sting table.
  116. const off_t StrtabOffset = O.tell();
  117. StrTab.write(O.get_stream());
  118. const off_t StrtabSize = O.tell() - StrtabOffset;
  119. std::vector<uint32_t> AddrInfoOffsets;
  120. // Write out the address infos for each function info.
  121. for (const auto &FuncInfo : Funcs) {
  122. if (Expected<uint64_t> OffsetOrErr = FuncInfo.encode(O))
  123. AddrInfoOffsets.push_back(OffsetOrErr.get());
  124. else
  125. return OffsetOrErr.takeError();
  126. }
  127. // Fixup the string table offset and size in the header
  128. O.fixup32((uint32_t)StrtabOffset, offsetof(Header, StrtabOffset));
  129. O.fixup32((uint32_t)StrtabSize, offsetof(Header, StrtabSize));
  130. // Fixup all address info offsets
  131. uint64_t Offset = 0;
  132. for (auto AddrInfoOffset: AddrInfoOffsets) {
  133. O.fixup32(AddrInfoOffset, AddrInfoOffsetsOffset + Offset);
  134. Offset += 4;
  135. }
  136. return ErrorSuccess();
  137. }
  138. llvm::Error GsymCreator::finalize(llvm::raw_ostream &OS) {
  139. std::lock_guard<std::recursive_mutex> Guard(Mutex);
  140. if (Finalized)
  141. return createStringError(std::errc::invalid_argument,
  142. "already finalized");
  143. Finalized = true;
  144. // Sort function infos so we can emit sorted functions.
  145. llvm::sort(Funcs.begin(), Funcs.end());
  146. // Don't let the string table indexes change by finalizing in order.
  147. StrTab.finalizeInOrder();
  148. // Remove duplicates function infos that have both entries from debug info
  149. // (DWARF or Breakpad) and entries from the SymbolTable.
  150. //
  151. // Also handle overlapping function. Usually there shouldn't be any, but they
  152. // can and do happen in some rare cases.
  153. //
  154. // (a) (b) (c)
  155. // ^ ^ ^ ^
  156. // |X |Y |X ^ |X
  157. // | | | |Y | ^
  158. // | | | v v |Y
  159. // v v v v
  160. //
  161. // In (a) and (b), Y is ignored and X will be reported for the full range.
  162. // In (c), both functions will be included in the result and lookups for an
  163. // address in the intersection will return Y because of binary search.
  164. //
  165. // Note that in case of (b), we cannot include Y in the result because then
  166. // we wouldn't find any function for range (end of Y, end of X)
  167. // with binary search
  168. auto NumBefore = Funcs.size();
  169. auto Curr = Funcs.begin();
  170. auto Prev = Funcs.end();
  171. while (Curr != Funcs.end()) {
  172. // Can't check for overlaps or same address ranges if we don't have a
  173. // previous entry
  174. if (Prev != Funcs.end()) {
  175. if (Prev->Range.intersects(Curr->Range)) {
  176. // Overlapping address ranges.
  177. if (Prev->Range == Curr->Range) {
  178. // Same address range. Check if one is from debug info and the other
  179. // is from a symbol table. If so, then keep the one with debug info.
  180. // Our sorting guarantees that entries with matching address ranges
  181. // that have debug info are last in the sort.
  182. if (*Prev == *Curr) {
  183. // FunctionInfo entries match exactly (range, lines, inlines)
  184. OS << "warning: duplicate function info entries, removing "
  185. "duplicate:\n"
  186. << *Curr << '\n';
  187. Curr = Funcs.erase(Prev);
  188. } else {
  189. if (!Prev->hasRichInfo() && Curr->hasRichInfo()) {
  190. // Same address range, one with no debug info (symbol) and the
  191. // next with debug info. Keep the latter.
  192. Curr = Funcs.erase(Prev);
  193. } else {
  194. OS << "warning: same address range contains different debug "
  195. << "info. Removing:\n"
  196. << *Prev << "\nIn favor of this one:\n"
  197. << *Curr << "\n";
  198. Curr = Funcs.erase(Prev);
  199. }
  200. }
  201. } else {
  202. // print warnings about overlaps
  203. OS << "warning: function ranges overlap:\n"
  204. << *Prev << "\n"
  205. << *Curr << "\n";
  206. }
  207. } else if (Prev->Range.size() == 0 &&
  208. Curr->Range.contains(Prev->Range.Start)) {
  209. OS << "warning: removing symbol:\n"
  210. << *Prev << "\nKeeping:\n"
  211. << *Curr << "\n";
  212. Curr = Funcs.erase(Prev);
  213. }
  214. }
  215. if (Curr == Funcs.end())
  216. break;
  217. Prev = Curr++;
  218. }
  219. OS << "Pruned " << NumBefore - Funcs.size() << " functions, ended with "
  220. << Funcs.size() << " total\n";
  221. return Error::success();
  222. }
  223. uint32_t GsymCreator::insertString(StringRef S) {
  224. std::lock_guard<std::recursive_mutex> Guard(Mutex);
  225. if (S.empty())
  226. return 0;
  227. return StrTab.add(S);
  228. }
  229. void GsymCreator::addFunctionInfo(FunctionInfo &&FI) {
  230. std::lock_guard<std::recursive_mutex> Guard(Mutex);
  231. Funcs.emplace_back(FI);
  232. }
  233. void GsymCreator::forEachFunctionInfo(
  234. std::function<bool(FunctionInfo &)> const &Callback) {
  235. std::lock_guard<std::recursive_mutex> Guard(Mutex);
  236. for (auto &FI : Funcs) {
  237. if (!Callback(FI))
  238. break;
  239. }
  240. }
  241. void GsymCreator::forEachFunctionInfo(
  242. std::function<bool(const FunctionInfo &)> const &Callback) const {
  243. std::lock_guard<std::recursive_mutex> Guard(Mutex);
  244. for (const auto &FI : Funcs) {
  245. if (!Callback(FI))
  246. break;
  247. }
  248. }