Archive.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526
  1. //===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file defines the ArchiveObjectFile class.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Object/Archive.h"
  14. #include "llvm/ADT/APInt.h"
  15. #include "llvm/ADT/SmallString.h"
  16. #include "llvm/ADT/Twine.h"
  17. #include "llvm/Support/Endian.h"
  18. #include "llvm/Support/MemoryBuffer.h"
  19. using namespace llvm;
  20. using namespace object;
  21. using namespace llvm::support::endian;
  22. static const char *const Magic = "!<arch>\n";
  23. static const char *const ThinMagic = "!<thin>\n";
  24. void Archive::anchor() { }
  25. StringRef ArchiveMemberHeader::getName() const {
  26. char EndCond;
  27. if (Name[0] == '/' || Name[0] == '#')
  28. EndCond = ' ';
  29. else
  30. EndCond = '/';
  31. llvm::StringRef::size_type end =
  32. llvm::StringRef(Name, sizeof(Name)).find(EndCond);
  33. if (end == llvm::StringRef::npos)
  34. end = sizeof(Name);
  35. assert(end <= sizeof(Name) && end > 0);
  36. // Don't include the EndCond if there is one.
  37. return llvm::StringRef(Name, end);
  38. }
  39. uint32_t ArchiveMemberHeader::getSize() const {
  40. uint32_t Ret;
  41. if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret))
  42. llvm_unreachable("Size is not a decimal number.");
  43. return Ret;
  44. }
  45. sys::fs::perms ArchiveMemberHeader::getAccessMode() const {
  46. unsigned Ret;
  47. if (StringRef(AccessMode, sizeof(AccessMode)).rtrim(" ").getAsInteger(8, Ret))
  48. llvm_unreachable("Access mode is not an octal number.");
  49. return static_cast<sys::fs::perms>(Ret);
  50. }
  51. sys::TimeValue ArchiveMemberHeader::getLastModified() const {
  52. unsigned Seconds;
  53. if (StringRef(LastModified, sizeof(LastModified)).rtrim(" ")
  54. .getAsInteger(10, Seconds))
  55. llvm_unreachable("Last modified time not a decimal number.");
  56. sys::TimeValue Ret;
  57. Ret.fromEpochTime(Seconds);
  58. return Ret;
  59. }
  60. unsigned ArchiveMemberHeader::getUID() const {
  61. unsigned Ret;
  62. if (StringRef(UID, sizeof(UID)).rtrim(" ").getAsInteger(10, Ret))
  63. llvm_unreachable("UID time not a decimal number.");
  64. return Ret;
  65. }
  66. unsigned ArchiveMemberHeader::getGID() const {
  67. unsigned Ret;
  68. if (StringRef(GID, sizeof(GID)).rtrim(" ").getAsInteger(10, Ret))
  69. llvm_unreachable("GID time not a decimal number.");
  70. return Ret;
  71. }
  72. Archive::Child::Child(const Archive *Parent, const char *Start)
  73. : Parent(Parent) {
  74. if (!Start)
  75. return;
  76. const ArchiveMemberHeader *Header =
  77. reinterpret_cast<const ArchiveMemberHeader *>(Start);
  78. uint64_t Size = sizeof(ArchiveMemberHeader);
  79. if (!Parent->IsThin || Header->getName() == "/" || Header->getName() == "//")
  80. Size += Header->getSize();
  81. Data = StringRef(Start, Size);
  82. // Setup StartOfFile and PaddingBytes.
  83. StartOfFile = sizeof(ArchiveMemberHeader);
  84. // Don't include attached name.
  85. StringRef Name = Header->getName();
  86. if (Name.startswith("#1/")) {
  87. uint64_t NameSize;
  88. if (Name.substr(3).rtrim(" ").getAsInteger(10, NameSize))
  89. llvm_unreachable("Long name length is not an integer");
  90. StartOfFile += NameSize;
  91. }
  92. }
  93. uint64_t Archive::Child::getSize() const {
  94. if (Parent->IsThin)
  95. return getHeader()->getSize();
  96. return Data.size() - StartOfFile;
  97. }
  98. uint64_t Archive::Child::getRawSize() const {
  99. return getHeader()->getSize();
  100. }
  101. Archive::Child Archive::Child::getNext() const {
  102. size_t SpaceToSkip = Data.size();
  103. // If it's odd, add 1 to make it even.
  104. if (SpaceToSkip & 1)
  105. ++SpaceToSkip;
  106. const char *NextLoc = Data.data() + SpaceToSkip;
  107. // Check to see if this is past the end of the archive.
  108. if (NextLoc >= Parent->Data.getBufferEnd())
  109. return Child(Parent, nullptr);
  110. return Child(Parent, NextLoc);
  111. }
  112. uint64_t Archive::Child::getChildOffset() const {
  113. const char *a = Parent->Data.getBuffer().data();
  114. const char *c = Data.data();
  115. uint64_t offset = c - a;
  116. return offset;
  117. }
  118. ErrorOr<StringRef> Archive::Child::getName() const {
  119. StringRef name = getRawName();
  120. // Check if it's a special name.
  121. if (name[0] == '/') {
  122. if (name.size() == 1) // Linker member.
  123. return name;
  124. if (name.size() == 2 && name[1] == '/') // String table.
  125. return name;
  126. // It's a long name.
  127. // Get the offset.
  128. std::size_t offset;
  129. if (name.substr(1).rtrim(" ").getAsInteger(10, offset))
  130. llvm_unreachable("Long name offset is not an integer");
  131. const char *addr = Parent->StringTable->Data.begin()
  132. + sizeof(ArchiveMemberHeader)
  133. + offset;
  134. // Verify it.
  135. if (Parent->StringTable == Parent->child_end()
  136. || addr < (Parent->StringTable->Data.begin()
  137. + sizeof(ArchiveMemberHeader))
  138. || addr > (Parent->StringTable->Data.begin()
  139. + sizeof(ArchiveMemberHeader)
  140. + Parent->StringTable->getSize()))
  141. return object_error::parse_failed;
  142. // GNU long file names end with a /.
  143. if (Parent->kind() == K_GNU || Parent->kind() == K_MIPS64) {
  144. StringRef::size_type End = StringRef(addr).find('/');
  145. return StringRef(addr, End);
  146. }
  147. return StringRef(addr);
  148. } else if (name.startswith("#1/")) {
  149. uint64_t name_size;
  150. if (name.substr(3).rtrim(" ").getAsInteger(10, name_size))
  151. llvm_unreachable("Long name length is not an ingeter");
  152. return Data.substr(sizeof(ArchiveMemberHeader), name_size)
  153. .rtrim(StringRef("\0", 1));
  154. }
  155. // It's a simple name.
  156. if (name[name.size() - 1] == '/')
  157. return name.substr(0, name.size() - 1);
  158. return name;
  159. }
  160. ErrorOr<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
  161. ErrorOr<StringRef> NameOrErr = getName();
  162. if (std::error_code EC = NameOrErr.getError())
  163. return EC;
  164. StringRef Name = NameOrErr.get();
  165. return MemoryBufferRef(getBuffer(), Name);
  166. }
  167. ErrorOr<std::unique_ptr<Binary>>
  168. Archive::Child::getAsBinary(LLVMContext *Context) const {
  169. ErrorOr<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
  170. if (std::error_code EC = BuffOrErr.getError())
  171. return EC;
  172. return createBinary(BuffOrErr.get(), Context);
  173. }
  174. ErrorOr<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
  175. std::error_code EC;
  176. std::unique_ptr<Archive> Ret(new Archive(Source, EC));
  177. if (EC)
  178. return EC;
  179. return std::move(Ret);
  180. }
  181. Archive::Archive(MemoryBufferRef Source, std::error_code &ec)
  182. : Binary(Binary::ID_Archive, Source), SymbolTable(child_end()) {
  183. StringRef Buffer = Data.getBuffer();
  184. // Check for sufficient magic.
  185. if (Buffer.startswith(ThinMagic)) {
  186. IsThin = true;
  187. } else if (Buffer.startswith(Magic)) {
  188. IsThin = false;
  189. } else {
  190. ec = object_error::invalid_file_type;
  191. return;
  192. }
  193. // Get the special members.
  194. child_iterator i = child_begin(false);
  195. child_iterator e = child_end();
  196. if (i == e) {
  197. ec = std::error_code();
  198. return;
  199. }
  200. StringRef Name = i->getRawName();
  201. // Below is the pattern that is used to figure out the archive format
  202. // GNU archive format
  203. // First member : / (may exist, if it exists, points to the symbol table )
  204. // Second member : // (may exist, if it exists, points to the string table)
  205. // Note : The string table is used if the filename exceeds 15 characters
  206. // BSD archive format
  207. // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
  208. // There is no string table, if the filename exceeds 15 characters or has a
  209. // embedded space, the filename has #1/<size>, The size represents the size
  210. // of the filename that needs to be read after the archive header
  211. // COFF archive format
  212. // First member : /
  213. // Second member : / (provides a directory of symbols)
  214. // Third member : // (may exist, if it exists, contains the string table)
  215. // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
  216. // even if the string table is empty. However, lib.exe does not in fact
  217. // seem to create the third member if there's no member whose filename
  218. // exceeds 15 characters. So the third member is optional.
  219. if (Name == "__.SYMDEF") {
  220. Format = K_BSD;
  221. SymbolTable = i;
  222. ++i;
  223. FirstRegular = i;
  224. ec = std::error_code();
  225. return;
  226. }
  227. if (Name.startswith("#1/")) {
  228. Format = K_BSD;
  229. // We know this is BSD, so getName will work since there is no string table.
  230. ErrorOr<StringRef> NameOrErr = i->getName();
  231. ec = NameOrErr.getError();
  232. if (ec)
  233. return;
  234. Name = NameOrErr.get();
  235. if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
  236. SymbolTable = i;
  237. ++i;
  238. }
  239. FirstRegular = i;
  240. return;
  241. }
  242. // MIPS 64-bit ELF archives use a special format of a symbol table.
  243. // This format is marked by `ar_name` field equals to "/SYM64/".
  244. // For detailed description see page 96 in the following document:
  245. // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
  246. bool has64SymTable = false;
  247. if (Name == "/" || Name == "/SYM64/") {
  248. SymbolTable = i;
  249. if (Name == "/SYM64/")
  250. has64SymTable = true;
  251. ++i;
  252. if (i == e) {
  253. ec = object_error::parse_failed;
  254. return;
  255. }
  256. Name = i->getRawName();
  257. }
  258. if (Name == "//") {
  259. Format = has64SymTable ? K_MIPS64 : K_GNU;
  260. StringTable = i;
  261. ++i;
  262. FirstRegular = i;
  263. ec = std::error_code();
  264. return;
  265. }
  266. if (Name[0] != '/') {
  267. Format = has64SymTable ? K_MIPS64 : K_GNU;
  268. FirstRegular = i;
  269. ec = std::error_code();
  270. return;
  271. }
  272. if (Name != "/") {
  273. ec = object_error::parse_failed;
  274. return;
  275. }
  276. Format = K_COFF;
  277. SymbolTable = i;
  278. ++i;
  279. if (i == e) {
  280. FirstRegular = i;
  281. ec = std::error_code();
  282. return;
  283. }
  284. Name = i->getRawName();
  285. if (Name == "//") {
  286. StringTable = i;
  287. ++i;
  288. }
  289. FirstRegular = i;
  290. ec = std::error_code();
  291. }
  292. Archive::child_iterator Archive::child_begin(bool SkipInternal) const {
  293. if (Data.getBufferSize() == 8) // empty archive.
  294. return child_end();
  295. if (SkipInternal)
  296. return FirstRegular;
  297. const char *Loc = Data.getBufferStart() + strlen(Magic);
  298. Child c(this, Loc);
  299. return c;
  300. }
  301. Archive::child_iterator Archive::child_end() const {
  302. return Child(this, nullptr);
  303. }
  304. StringRef Archive::Symbol::getName() const {
  305. return Parent->SymbolTable->getBuffer().begin() + StringIndex;
  306. }
  307. ErrorOr<Archive::child_iterator> Archive::Symbol::getMember() const {
  308. const char *Buf = Parent->SymbolTable->getBuffer().begin();
  309. const char *Offsets = Buf;
  310. if (Parent->kind() == K_MIPS64)
  311. Offsets += sizeof(uint64_t);
  312. else
  313. Offsets += sizeof(uint32_t);
  314. uint32_t Offset = 0;
  315. if (Parent->kind() == K_GNU) {
  316. Offset = read32be(Offsets + SymbolIndex * 4);
  317. } else if (Parent->kind() == K_MIPS64) {
  318. Offset = read64be(Offsets + SymbolIndex * 8);
  319. } else if (Parent->kind() == K_BSD) {
  320. // The SymbolIndex is an index into the ranlib structs that start at
  321. // Offsets (the first uint32_t is the number of bytes of the ranlib
  322. // structs). The ranlib structs are a pair of uint32_t's the first
  323. // being a string table offset and the second being the offset into
  324. // the archive of the member that defines the symbol. Which is what
  325. // is needed here.
  326. Offset = read32le(Offsets + SymbolIndex * 8 + 4);
  327. } else {
  328. // Skip offsets.
  329. uint32_t MemberCount = read32le(Buf);
  330. Buf += MemberCount * 4 + 4;
  331. uint32_t SymbolCount = read32le(Buf);
  332. if (SymbolIndex >= SymbolCount)
  333. return object_error::parse_failed;
  334. // Skip SymbolCount to get to the indices table.
  335. const char *Indices = Buf + 4;
  336. // Get the index of the offset in the file member offset table for this
  337. // symbol.
  338. uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
  339. // Subtract 1 since OffsetIndex is 1 based.
  340. --OffsetIndex;
  341. if (OffsetIndex >= MemberCount)
  342. return object_error::parse_failed;
  343. Offset = read32le(Offsets + OffsetIndex * 4);
  344. }
  345. const char *Loc = Parent->getData().begin() + Offset;
  346. child_iterator Iter(Child(Parent, Loc));
  347. return Iter;
  348. }
  349. Archive::Symbol Archive::Symbol::getNext() const {
  350. Symbol t(*this);
  351. if (Parent->kind() == K_BSD) {
  352. // t.StringIndex is an offset from the start of the __.SYMDEF or
  353. // "__.SYMDEF SORTED" member into the string table for the ranlib
  354. // struct indexed by t.SymbolIndex . To change t.StringIndex to the
  355. // offset in the string table for t.SymbolIndex+1 we subtract the
  356. // its offset from the start of the string table for t.SymbolIndex
  357. // and add the offset of the string table for t.SymbolIndex+1.
  358. // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
  359. // which is the number of bytes of ranlib structs that follow. The ranlib
  360. // structs are a pair of uint32_t's the first being a string table offset
  361. // and the second being the offset into the archive of the member that
  362. // define the symbol. After that the next uint32_t is the byte count of
  363. // the string table followed by the string table.
  364. const char *Buf = Parent->SymbolTable->getBuffer().begin();
  365. uint32_t RanlibCount = 0;
  366. RanlibCount = read32le(Buf) / 8;
  367. // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
  368. // don't change the t.StringIndex as we don't want to reference a ranlib
  369. // past RanlibCount.
  370. if (t.SymbolIndex + 1 < RanlibCount) {
  371. const char *Ranlibs = Buf + 4;
  372. uint32_t CurRanStrx = 0;
  373. uint32_t NextRanStrx = 0;
  374. CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
  375. NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
  376. t.StringIndex -= CurRanStrx;
  377. t.StringIndex += NextRanStrx;
  378. }
  379. } else {
  380. // Go to one past next null.
  381. t.StringIndex =
  382. Parent->SymbolTable->getBuffer().find('\0', t.StringIndex) + 1;
  383. }
  384. ++t.SymbolIndex;
  385. return t;
  386. }
  387. Archive::symbol_iterator Archive::symbol_begin() const {
  388. if (!hasSymbolTable())
  389. return symbol_iterator(Symbol(this, 0, 0));
  390. const char *buf = SymbolTable->getBuffer().begin();
  391. if (kind() == K_GNU) {
  392. uint32_t symbol_count = 0;
  393. symbol_count = read32be(buf);
  394. buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
  395. } else if (kind() == K_MIPS64) {
  396. uint64_t symbol_count = read64be(buf);
  397. buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
  398. } else if (kind() == K_BSD) {
  399. // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
  400. // which is the number of bytes of ranlib structs that follow. The ranlib
  401. // structs are a pair of uint32_t's the first being a string table offset
  402. // and the second being the offset into the archive of the member that
  403. // define the symbol. After that the next uint32_t is the byte count of
  404. // the string table followed by the string table.
  405. uint32_t ranlib_count = 0;
  406. ranlib_count = read32le(buf) / 8;
  407. const char *ranlibs = buf + 4;
  408. uint32_t ran_strx = 0;
  409. ran_strx = read32le(ranlibs);
  410. buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
  411. // Skip the byte count of the string table.
  412. buf += sizeof(uint32_t);
  413. buf += ran_strx;
  414. } else {
  415. uint32_t member_count = 0;
  416. uint32_t symbol_count = 0;
  417. member_count = read32le(buf);
  418. buf += 4 + (member_count * 4); // Skip offsets.
  419. symbol_count = read32le(buf);
  420. buf += 4 + (symbol_count * 2); // Skip indices.
  421. }
  422. uint32_t string_start_offset = buf - SymbolTable->getBuffer().begin();
  423. return symbol_iterator(Symbol(this, 0, string_start_offset));
  424. }
  425. Archive::symbol_iterator Archive::symbol_end() const {
  426. if (!hasSymbolTable())
  427. return symbol_iterator(Symbol(this, 0, 0));
  428. return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
  429. }
  430. uint32_t Archive::getNumberOfSymbols() const {
  431. const char *buf = SymbolTable->getBuffer().begin();
  432. if (kind() == K_GNU)
  433. return read32be(buf);
  434. if (kind() == K_MIPS64)
  435. return read64be(buf);
  436. if (kind() == K_BSD)
  437. return read32le(buf) / 8;
  438. uint32_t member_count = 0;
  439. member_count = read32le(buf);
  440. buf += 4 + (member_count * 4); // Skip offsets.
  441. return read32le(buf);
  442. }
  443. Archive::child_iterator Archive::findSym(StringRef name) const {
  444. Archive::symbol_iterator bs = symbol_begin();
  445. Archive::symbol_iterator es = symbol_end();
  446. for (; bs != es; ++bs) {
  447. StringRef SymName = bs->getName();
  448. if (SymName == name) {
  449. ErrorOr<Archive::child_iterator> ResultOrErr = bs->getMember();
  450. // FIXME: Should we really eat the error?
  451. if (ResultOrErr.getError())
  452. return child_end();
  453. return ResultOrErr.get();
  454. }
  455. }
  456. return child_end();
  457. }
  458. bool Archive::hasSymbolTable() const {
  459. return SymbolTable != child_end();
  460. }