Archive.cpp 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976
  1. //===- Archive.cpp - ar File Format implementation --------------*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file defines the ArchiveObjectFile class.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "llvm/Object/Archive.h"
  14. #include "llvm/ADT/SmallString.h"
  15. #include "llvm/ADT/Twine.h"
  16. #include "llvm/Support/Endian.h"
  17. #include "llvm/Support/MemoryBuffer.h"
  18. #include "llvm/Support/Path.h"
  19. using namespace llvm;
  20. using namespace object;
  21. using namespace llvm::support::endian;
  22. static const char *const Magic = "!<arch>\n";
  23. static const char *const ThinMagic = "!<thin>\n";
  24. void Archive::anchor() { }
  25. static Error
  26. malformedError(Twine Msg) {
  27. std::string StringMsg = "truncated or malformed archive (" + Msg.str() + ")";
  28. return make_error<GenericBinaryError>(std::move(StringMsg),
  29. object_error::parse_failed);
  30. }
  31. ArchiveMemberHeader::ArchiveMemberHeader(const Archive *Parent,
  32. const char *RawHeaderPtr,
  33. uint64_t Size, Error *Err)
  34. : Parent(Parent),
  35. ArMemHdr(reinterpret_cast<const ArMemHdrType *>(RawHeaderPtr)) {
  36. if (RawHeaderPtr == nullptr)
  37. return;
  38. ErrorAsOutParameter ErrAsOutParam(Err);
  39. if (Size < sizeof(ArMemHdrType)) {
  40. if (Err) {
  41. std::string Msg("remaining size of archive too small for next archive "
  42. "member header ");
  43. Expected<StringRef> NameOrErr = getName(Size);
  44. if (!NameOrErr) {
  45. consumeError(NameOrErr.takeError());
  46. uint64_t Offset = RawHeaderPtr - Parent->getData().data();
  47. *Err = malformedError(Msg + "at offset " + Twine(Offset));
  48. } else
  49. *Err = malformedError(Msg + "for " + NameOrErr.get());
  50. }
  51. return;
  52. }
  53. if (ArMemHdr->Terminator[0] != '`' || ArMemHdr->Terminator[1] != '\n') {
  54. if (Err) {
  55. std::string Buf;
  56. raw_string_ostream OS(Buf);
  57. OS.write_escaped(llvm::StringRef(ArMemHdr->Terminator,
  58. sizeof(ArMemHdr->Terminator)));
  59. OS.flush();
  60. std::string Msg("terminator characters in archive member \"" + Buf +
  61. "\" not the correct \"`\\n\" values for the archive "
  62. "member header ");
  63. Expected<StringRef> NameOrErr = getName(Size);
  64. if (!NameOrErr) {
  65. consumeError(NameOrErr.takeError());
  66. uint64_t Offset = RawHeaderPtr - Parent->getData().data();
  67. *Err = malformedError(Msg + "at offset " + Twine(Offset));
  68. } else
  69. *Err = malformedError(Msg + "for " + NameOrErr.get());
  70. }
  71. return;
  72. }
  73. }
  74. // This gets the raw name from the ArMemHdr->Name field and checks that it is
  75. // valid for the kind of archive. If it is not valid it returns an Error.
  76. Expected<StringRef> ArchiveMemberHeader::getRawName() const {
  77. char EndCond;
  78. auto Kind = Parent->kind();
  79. if (Kind == Archive::K_BSD || Kind == Archive::K_DARWIN64) {
  80. if (ArMemHdr->Name[0] == ' ') {
  81. uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
  82. Parent->getData().data();
  83. return malformedError("name contains a leading space for archive member "
  84. "header at offset " + Twine(Offset));
  85. }
  86. EndCond = ' ';
  87. }
  88. else if (ArMemHdr->Name[0] == '/' || ArMemHdr->Name[0] == '#')
  89. EndCond = ' ';
  90. else
  91. EndCond = '/';
  92. llvm::StringRef::size_type end =
  93. llvm::StringRef(ArMemHdr->Name, sizeof(ArMemHdr->Name)).find(EndCond);
  94. if (end == llvm::StringRef::npos)
  95. end = sizeof(ArMemHdr->Name);
  96. assert(end <= sizeof(ArMemHdr->Name) && end > 0);
  97. // Don't include the EndCond if there is one.
  98. return llvm::StringRef(ArMemHdr->Name, end);
  99. }
  100. // This gets the name looking up long names. Size is the size of the archive
  101. // member including the header, so the size of any name following the header
  102. // is checked to make sure it does not overflow.
  103. Expected<StringRef> ArchiveMemberHeader::getName(uint64_t Size) const {
  104. // This can be called from the ArchiveMemberHeader constructor when the
  105. // archive header is truncated to produce an error message with the name.
  106. // Make sure the name field is not truncated.
  107. if (Size < offsetof(ArMemHdrType, Name) + sizeof(ArMemHdr->Name)) {
  108. uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
  109. Parent->getData().data();
  110. return malformedError("archive header truncated before the name field "
  111. "for archive member header at offset " +
  112. Twine(ArchiveOffset));
  113. }
  114. // The raw name itself can be invalid.
  115. Expected<StringRef> NameOrErr = getRawName();
  116. if (!NameOrErr)
  117. return NameOrErr.takeError();
  118. StringRef Name = NameOrErr.get();
  119. // Check if it's a special name.
  120. if (Name[0] == '/') {
  121. if (Name.size() == 1) // Linker member.
  122. return Name;
  123. if (Name.size() == 2 && Name[1] == '/') // String table.
  124. return Name;
  125. // It's a long name.
  126. // Get the string table offset.
  127. std::size_t StringOffset;
  128. if (Name.substr(1).rtrim(' ').getAsInteger(10, StringOffset)) {
  129. std::string Buf;
  130. raw_string_ostream OS(Buf);
  131. OS.write_escaped(Name.substr(1).rtrim(' '));
  132. OS.flush();
  133. uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
  134. Parent->getData().data();
  135. return malformedError("long name offset characters after the '/' are "
  136. "not all decimal numbers: '" + Buf + "' for "
  137. "archive member header at offset " +
  138. Twine(ArchiveOffset));
  139. }
  140. // Verify it.
  141. if (StringOffset >= Parent->getStringTable().size()) {
  142. uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
  143. Parent->getData().data();
  144. return malformedError("long name offset " + Twine(StringOffset) + " past "
  145. "the end of the string table for archive member "
  146. "header at offset " + Twine(ArchiveOffset));
  147. }
  148. const char *addr = Parent->getStringTable().begin() + StringOffset;
  149. // GNU long file names end with a "/\n".
  150. if (Parent->kind() == Archive::K_GNU ||
  151. Parent->kind() == Archive::K_MIPS64) {
  152. StringRef::size_type End = StringRef(addr).find('\n');
  153. return StringRef(addr, End - 1);
  154. }
  155. return addr;
  156. }
  157. if (Name.startswith("#1/")) {
  158. uint64_t NameLength;
  159. if (Name.substr(3).rtrim(' ').getAsInteger(10, NameLength)) {
  160. std::string Buf;
  161. raw_string_ostream OS(Buf);
  162. OS.write_escaped(Name.substr(3).rtrim(' '));
  163. OS.flush();
  164. uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
  165. Parent->getData().data();
  166. return malformedError("long name length characters after the #1/ are "
  167. "not all decimal numbers: '" + Buf + "' for "
  168. "archive member header at offset " +
  169. Twine(ArchiveOffset));
  170. }
  171. if (getSizeOf() + NameLength > Size) {
  172. uint64_t ArchiveOffset = reinterpret_cast<const char *>(ArMemHdr) -
  173. Parent->getData().data();
  174. return malformedError("long name length: " + Twine(NameLength) +
  175. " extends past the end of the member or archive "
  176. "for archive member header at offset " +
  177. Twine(ArchiveOffset));
  178. }
  179. return StringRef(reinterpret_cast<const char *>(ArMemHdr) + getSizeOf(),
  180. NameLength).rtrim('\0');
  181. }
  182. // It is not a long name so trim the blanks at the end of the name.
  183. if (Name[Name.size() - 1] != '/')
  184. return Name.rtrim(' ');
  185. // It's a simple name.
  186. return Name.drop_back(1);
  187. }
  188. Expected<uint32_t> ArchiveMemberHeader::getSize() const {
  189. uint32_t Ret;
  190. if (llvm::StringRef(ArMemHdr->Size,
  191. sizeof(ArMemHdr->Size)).rtrim(" ").getAsInteger(10, Ret)) {
  192. std::string Buf;
  193. raw_string_ostream OS(Buf);
  194. OS.write_escaped(llvm::StringRef(ArMemHdr->Size,
  195. sizeof(ArMemHdr->Size)).rtrim(" "));
  196. OS.flush();
  197. uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
  198. Parent->getData().data();
  199. return malformedError("characters in size field in archive header are not "
  200. "all decimal numbers: '" + Buf + "' for archive "
  201. "member header at offset " + Twine(Offset));
  202. }
  203. return Ret;
  204. }
  205. Expected<sys::fs::perms> ArchiveMemberHeader::getAccessMode() const {
  206. unsigned Ret;
  207. if (StringRef(ArMemHdr->AccessMode,
  208. sizeof(ArMemHdr->AccessMode)).rtrim(' ').getAsInteger(8, Ret)) {
  209. std::string Buf;
  210. raw_string_ostream OS(Buf);
  211. OS.write_escaped(llvm::StringRef(ArMemHdr->AccessMode,
  212. sizeof(ArMemHdr->AccessMode)).rtrim(" "));
  213. OS.flush();
  214. uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
  215. Parent->getData().data();
  216. return malformedError("characters in AccessMode field in archive header "
  217. "are not all decimal numbers: '" + Buf + "' for the "
  218. "archive member header at offset " + Twine(Offset));
  219. }
  220. return static_cast<sys::fs::perms>(Ret);
  221. }
  222. Expected<sys::TimePoint<std::chrono::seconds>>
  223. ArchiveMemberHeader::getLastModified() const {
  224. unsigned Seconds;
  225. if (StringRef(ArMemHdr->LastModified,
  226. sizeof(ArMemHdr->LastModified)).rtrim(' ')
  227. .getAsInteger(10, Seconds)) {
  228. std::string Buf;
  229. raw_string_ostream OS(Buf);
  230. OS.write_escaped(llvm::StringRef(ArMemHdr->LastModified,
  231. sizeof(ArMemHdr->LastModified)).rtrim(" "));
  232. OS.flush();
  233. uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
  234. Parent->getData().data();
  235. return malformedError("characters in LastModified field in archive header "
  236. "are not all decimal numbers: '" + Buf + "' for the "
  237. "archive member header at offset " + Twine(Offset));
  238. }
  239. return sys::toTimePoint(Seconds);
  240. }
  241. Expected<unsigned> ArchiveMemberHeader::getUID() const {
  242. unsigned Ret;
  243. StringRef User = StringRef(ArMemHdr->UID, sizeof(ArMemHdr->UID)).rtrim(' ');
  244. if (User.empty())
  245. return 0;
  246. if (User.getAsInteger(10, Ret)) {
  247. std::string Buf;
  248. raw_string_ostream OS(Buf);
  249. OS.write_escaped(User);
  250. OS.flush();
  251. uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
  252. Parent->getData().data();
  253. return malformedError("characters in UID field in archive header "
  254. "are not all decimal numbers: '" + Buf + "' for the "
  255. "archive member header at offset " + Twine(Offset));
  256. }
  257. return Ret;
  258. }
  259. Expected<unsigned> ArchiveMemberHeader::getGID() const {
  260. unsigned Ret;
  261. StringRef Group = StringRef(ArMemHdr->GID, sizeof(ArMemHdr->GID)).rtrim(' ');
  262. if (Group.empty())
  263. return 0;
  264. if (Group.getAsInteger(10, Ret)) {
  265. std::string Buf;
  266. raw_string_ostream OS(Buf);
  267. OS.write_escaped(Group);
  268. OS.flush();
  269. uint64_t Offset = reinterpret_cast<const char *>(ArMemHdr) -
  270. Parent->getData().data();
  271. return malformedError("characters in GID field in archive header "
  272. "are not all decimal numbers: '" + Buf + "' for the "
  273. "archive member header at offset " + Twine(Offset));
  274. }
  275. return Ret;
  276. }
  277. Archive::Child::Child(const Archive *Parent, StringRef Data,
  278. uint16_t StartOfFile)
  279. : Parent(Parent), Header(Parent, Data.data(), Data.size(), nullptr),
  280. Data(Data), StartOfFile(StartOfFile) {
  281. }
  282. Archive::Child::Child(const Archive *Parent, const char *Start, Error *Err)
  283. : Parent(Parent),
  284. Header(Parent, Start,
  285. Parent
  286. ? Parent->getData().size() - (Start - Parent->getData().data())
  287. : 0, Err) {
  288. if (!Start)
  289. return;
  290. // If we are pointed to real data, Start is not a nullptr, then there must be
  291. // a non-null Err pointer available to report malformed data on. Only in
  292. // the case sentinel value is being constructed is Err is permitted to be a
  293. // nullptr.
  294. assert(Err && "Err can't be nullptr if Start is not a nullptr");
  295. ErrorAsOutParameter ErrAsOutParam(Err);
  296. // If there was an error in the construction of the Header
  297. // then just return with the error now set.
  298. if (*Err)
  299. return;
  300. uint64_t Size = Header.getSizeOf();
  301. Data = StringRef(Start, Size);
  302. Expected<bool> isThinOrErr = isThinMember();
  303. if (!isThinOrErr) {
  304. *Err = isThinOrErr.takeError();
  305. return;
  306. }
  307. bool isThin = isThinOrErr.get();
  308. if (!isThin) {
  309. Expected<uint64_t> MemberSize = getRawSize();
  310. if (!MemberSize) {
  311. *Err = MemberSize.takeError();
  312. return;
  313. }
  314. Size += MemberSize.get();
  315. Data = StringRef(Start, Size);
  316. }
  317. // Setup StartOfFile and PaddingBytes.
  318. StartOfFile = Header.getSizeOf();
  319. // Don't include attached name.
  320. Expected<StringRef> NameOrErr = getRawName();
  321. if (!NameOrErr){
  322. *Err = NameOrErr.takeError();
  323. return;
  324. }
  325. StringRef Name = NameOrErr.get();
  326. if (Name.startswith("#1/")) {
  327. uint64_t NameSize;
  328. if (Name.substr(3).rtrim(' ').getAsInteger(10, NameSize)) {
  329. std::string Buf;
  330. raw_string_ostream OS(Buf);
  331. OS.write_escaped(Name.substr(3).rtrim(' '));
  332. OS.flush();
  333. uint64_t Offset = Start - Parent->getData().data();
  334. *Err = malformedError("long name length characters after the #1/ are "
  335. "not all decimal numbers: '" + Buf + "' for "
  336. "archive member header at offset " +
  337. Twine(Offset));
  338. return;
  339. }
  340. StartOfFile += NameSize;
  341. }
  342. }
  343. Expected<uint64_t> Archive::Child::getSize() const {
  344. if (Parent->IsThin) {
  345. Expected<uint32_t> Size = Header.getSize();
  346. if (!Size)
  347. return Size.takeError();
  348. return Size.get();
  349. }
  350. return Data.size() - StartOfFile;
  351. }
  352. Expected<uint64_t> Archive::Child::getRawSize() const {
  353. return Header.getSize();
  354. }
  355. Expected<bool> Archive::Child::isThinMember() const {
  356. Expected<StringRef> NameOrErr = Header.getRawName();
  357. if (!NameOrErr)
  358. return NameOrErr.takeError();
  359. StringRef Name = NameOrErr.get();
  360. return Parent->IsThin && Name != "/" && Name != "//";
  361. }
  362. Expected<std::string> Archive::Child::getFullName() const {
  363. Expected<bool> isThin = isThinMember();
  364. if (!isThin)
  365. return isThin.takeError();
  366. assert(isThin.get());
  367. Expected<StringRef> NameOrErr = getName();
  368. if (!NameOrErr)
  369. return NameOrErr.takeError();
  370. StringRef Name = *NameOrErr;
  371. if (sys::path::is_absolute(Name))
  372. return Name;
  373. SmallString<128> FullName = sys::path::parent_path(
  374. Parent->getMemoryBufferRef().getBufferIdentifier());
  375. sys::path::append(FullName, Name);
  376. return StringRef(FullName);
  377. }
  378. Expected<StringRef> Archive::Child::getBuffer() const {
  379. Expected<bool> isThinOrErr = isThinMember();
  380. if (!isThinOrErr)
  381. return isThinOrErr.takeError();
  382. bool isThin = isThinOrErr.get();
  383. if (!isThin) {
  384. Expected<uint32_t> Size = getSize();
  385. if (!Size)
  386. return Size.takeError();
  387. return StringRef(Data.data() + StartOfFile, Size.get());
  388. }
  389. Expected<std::string> FullNameOrErr = getFullName();
  390. if (!FullNameOrErr)
  391. return FullNameOrErr.takeError();
  392. const std::string &FullName = *FullNameOrErr;
  393. ErrorOr<std::unique_ptr<MemoryBuffer>> Buf = MemoryBuffer::getFile(FullName);
  394. if (std::error_code EC = Buf.getError())
  395. return errorCodeToError(EC);
  396. Parent->ThinBuffers.push_back(std::move(*Buf));
  397. return Parent->ThinBuffers.back()->getBuffer();
  398. }
  399. Expected<Archive::Child> Archive::Child::getNext() const {
  400. size_t SpaceToSkip = Data.size();
  401. // If it's odd, add 1 to make it even.
  402. if (SpaceToSkip & 1)
  403. ++SpaceToSkip;
  404. const char *NextLoc = Data.data() + SpaceToSkip;
  405. // Check to see if this is at the end of the archive.
  406. if (NextLoc == Parent->Data.getBufferEnd())
  407. return Child(nullptr, nullptr, nullptr);
  408. // Check to see if this is past the end of the archive.
  409. if (NextLoc > Parent->Data.getBufferEnd()) {
  410. std::string Msg("offset to next archive member past the end of the archive "
  411. "after member ");
  412. Expected<StringRef> NameOrErr = getName();
  413. if (!NameOrErr) {
  414. consumeError(NameOrErr.takeError());
  415. uint64_t Offset = Data.data() - Parent->getData().data();
  416. return malformedError(Msg + "at offset " + Twine(Offset));
  417. } else
  418. return malformedError(Msg + NameOrErr.get());
  419. }
  420. Error Err = Error::success();
  421. Child Ret(Parent, NextLoc, &Err);
  422. if (Err)
  423. return std::move(Err);
  424. return Ret;
  425. }
  426. uint64_t Archive::Child::getChildOffset() const {
  427. const char *a = Parent->Data.getBuffer().data();
  428. const char *c = Data.data();
  429. uint64_t offset = c - a;
  430. return offset;
  431. }
  432. Expected<StringRef> Archive::Child::getName() const {
  433. Expected<uint64_t> RawSizeOrErr = getRawSize();
  434. if (!RawSizeOrErr)
  435. return RawSizeOrErr.takeError();
  436. uint64_t RawSize = RawSizeOrErr.get();
  437. Expected<StringRef> NameOrErr = Header.getName(Header.getSizeOf() + RawSize);
  438. if (!NameOrErr)
  439. return NameOrErr.takeError();
  440. StringRef Name = NameOrErr.get();
  441. return Name;
  442. }
  443. Expected<MemoryBufferRef> Archive::Child::getMemoryBufferRef() const {
  444. Expected<StringRef> NameOrErr = getName();
  445. if (!NameOrErr)
  446. return NameOrErr.takeError();
  447. StringRef Name = NameOrErr.get();
  448. Expected<StringRef> Buf = getBuffer();
  449. if (!Buf)
  450. return Buf.takeError();
  451. return MemoryBufferRef(*Buf, Name);
  452. }
  453. Expected<std::unique_ptr<Binary>>
  454. Archive::Child::getAsBinary(LLVMContext *Context) const {
  455. Expected<MemoryBufferRef> BuffOrErr = getMemoryBufferRef();
  456. if (!BuffOrErr)
  457. return BuffOrErr.takeError();
  458. auto BinaryOrErr = createBinary(BuffOrErr.get(), Context);
  459. if (BinaryOrErr)
  460. return std::move(*BinaryOrErr);
  461. return BinaryOrErr.takeError();
  462. }
  463. Expected<std::unique_ptr<Archive>> Archive::create(MemoryBufferRef Source) {
  464. Error Err = Error::success();
  465. std::unique_ptr<Archive> Ret(new Archive(Source, Err));
  466. if (Err)
  467. return std::move(Err);
  468. return std::move(Ret);
  469. }
  470. void Archive::setFirstRegular(const Child &C) {
  471. FirstRegularData = C.Data;
  472. FirstRegularStartOfFile = C.StartOfFile;
  473. }
  474. Archive::Archive(MemoryBufferRef Source, Error &Err)
  475. : Binary(Binary::ID_Archive, Source) {
  476. ErrorAsOutParameter ErrAsOutParam(&Err);
  477. StringRef Buffer = Data.getBuffer();
  478. // Check for sufficient magic.
  479. if (Buffer.startswith(ThinMagic)) {
  480. IsThin = true;
  481. } else if (Buffer.startswith(Magic)) {
  482. IsThin = false;
  483. } else {
  484. Err = make_error<GenericBinaryError>("File too small to be an archive",
  485. object_error::invalid_file_type);
  486. return;
  487. }
  488. // Make sure Format is initialized before any call to
  489. // ArchiveMemberHeader::getName() is made. This could be a valid empty
  490. // archive which is the same in all formats. So claiming it to be gnu to is
  491. // fine if not totally correct before we look for a string table or table of
  492. // contents.
  493. Format = K_GNU;
  494. // Get the special members.
  495. child_iterator I = child_begin(Err, false);
  496. if (Err)
  497. return;
  498. child_iterator E = child_end();
  499. // See if this is a valid empty archive and if so return.
  500. if (I == E) {
  501. Err = Error::success();
  502. return;
  503. }
  504. const Child *C = &*I;
  505. auto Increment = [&]() {
  506. ++I;
  507. if (Err)
  508. return true;
  509. C = &*I;
  510. return false;
  511. };
  512. Expected<StringRef> NameOrErr = C->getRawName();
  513. if (!NameOrErr) {
  514. Err = NameOrErr.takeError();
  515. return;
  516. }
  517. StringRef Name = NameOrErr.get();
  518. // Below is the pattern that is used to figure out the archive format
  519. // GNU archive format
  520. // First member : / (may exist, if it exists, points to the symbol table )
  521. // Second member : // (may exist, if it exists, points to the string table)
  522. // Note : The string table is used if the filename exceeds 15 characters
  523. // BSD archive format
  524. // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table)
  525. // There is no string table, if the filename exceeds 15 characters or has a
  526. // embedded space, the filename has #1/<size>, The size represents the size
  527. // of the filename that needs to be read after the archive header
  528. // COFF archive format
  529. // First member : /
  530. // Second member : / (provides a directory of symbols)
  531. // Third member : // (may exist, if it exists, contains the string table)
  532. // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present
  533. // even if the string table is empty. However, lib.exe does not in fact
  534. // seem to create the third member if there's no member whose filename
  535. // exceeds 15 characters. So the third member is optional.
  536. if (Name == "__.SYMDEF" || Name == "__.SYMDEF_64") {
  537. if (Name == "__.SYMDEF")
  538. Format = K_BSD;
  539. else // Name == "__.SYMDEF_64"
  540. Format = K_DARWIN64;
  541. // We know that the symbol table is not an external file, but we still must
  542. // check any Expected<> return value.
  543. Expected<StringRef> BufOrErr = C->getBuffer();
  544. if (!BufOrErr) {
  545. Err = BufOrErr.takeError();
  546. return;
  547. }
  548. SymbolTable = BufOrErr.get();
  549. if (Increment())
  550. return;
  551. setFirstRegular(*C);
  552. Err = Error::success();
  553. return;
  554. }
  555. if (Name.startswith("#1/")) {
  556. Format = K_BSD;
  557. // We know this is BSD, so getName will work since there is no string table.
  558. Expected<StringRef> NameOrErr = C->getName();
  559. if (!NameOrErr) {
  560. Err = NameOrErr.takeError();
  561. return;
  562. }
  563. Name = NameOrErr.get();
  564. if (Name == "__.SYMDEF SORTED" || Name == "__.SYMDEF") {
  565. // We know that the symbol table is not an external file, but we still
  566. // must check any Expected<> return value.
  567. Expected<StringRef> BufOrErr = C->getBuffer();
  568. if (!BufOrErr) {
  569. Err = BufOrErr.takeError();
  570. return;
  571. }
  572. SymbolTable = BufOrErr.get();
  573. if (Increment())
  574. return;
  575. }
  576. else if (Name == "__.SYMDEF_64 SORTED" || Name == "__.SYMDEF_64") {
  577. Format = K_DARWIN64;
  578. // We know that the symbol table is not an external file, but we still
  579. // must check any Expected<> return value.
  580. Expected<StringRef> BufOrErr = C->getBuffer();
  581. if (!BufOrErr) {
  582. Err = BufOrErr.takeError();
  583. return;
  584. }
  585. SymbolTable = BufOrErr.get();
  586. if (Increment())
  587. return;
  588. }
  589. setFirstRegular(*C);
  590. return;
  591. }
  592. // MIPS 64-bit ELF archives use a special format of a symbol table.
  593. // This format is marked by `ar_name` field equals to "/SYM64/".
  594. // For detailed description see page 96 in the following document:
  595. // http://techpubs.sgi.com/library/manuals/4000/007-4658-001/pdf/007-4658-001.pdf
  596. bool has64SymTable = false;
  597. if (Name == "/" || Name == "/SYM64/") {
  598. // We know that the symbol table is not an external file, but we still
  599. // must check any Expected<> return value.
  600. Expected<StringRef> BufOrErr = C->getBuffer();
  601. if (!BufOrErr) {
  602. Err = BufOrErr.takeError();
  603. return;
  604. }
  605. SymbolTable = BufOrErr.get();
  606. if (Name == "/SYM64/")
  607. has64SymTable = true;
  608. if (Increment())
  609. return;
  610. if (I == E) {
  611. Err = Error::success();
  612. return;
  613. }
  614. Expected<StringRef> NameOrErr = C->getRawName();
  615. if (!NameOrErr) {
  616. Err = NameOrErr.takeError();
  617. return;
  618. }
  619. Name = NameOrErr.get();
  620. }
  621. if (Name == "//") {
  622. Format = has64SymTable ? K_MIPS64 : K_GNU;
  623. // The string table is never an external member, but we still
  624. // must check any Expected<> return value.
  625. Expected<StringRef> BufOrErr = C->getBuffer();
  626. if (!BufOrErr) {
  627. Err = BufOrErr.takeError();
  628. return;
  629. }
  630. StringTable = BufOrErr.get();
  631. if (Increment())
  632. return;
  633. setFirstRegular(*C);
  634. Err = Error::success();
  635. return;
  636. }
  637. if (Name[0] != '/') {
  638. Format = has64SymTable ? K_MIPS64 : K_GNU;
  639. setFirstRegular(*C);
  640. Err = Error::success();
  641. return;
  642. }
  643. if (Name != "/") {
  644. Err = errorCodeToError(object_error::parse_failed);
  645. return;
  646. }
  647. Format = K_COFF;
  648. // We know that the symbol table is not an external file, but we still
  649. // must check any Expected<> return value.
  650. Expected<StringRef> BufOrErr = C->getBuffer();
  651. if (!BufOrErr) {
  652. Err = BufOrErr.takeError();
  653. return;
  654. }
  655. SymbolTable = BufOrErr.get();
  656. if (Increment())
  657. return;
  658. if (I == E) {
  659. setFirstRegular(*C);
  660. Err = Error::success();
  661. return;
  662. }
  663. NameOrErr = C->getRawName();
  664. if (!NameOrErr) {
  665. Err = NameOrErr.takeError();
  666. return;
  667. }
  668. Name = NameOrErr.get();
  669. if (Name == "//") {
  670. // The string table is never an external member, but we still
  671. // must check any Expected<> return value.
  672. Expected<StringRef> BufOrErr = C->getBuffer();
  673. if (!BufOrErr) {
  674. Err = BufOrErr.takeError();
  675. return;
  676. }
  677. StringTable = BufOrErr.get();
  678. if (Increment())
  679. return;
  680. }
  681. setFirstRegular(*C);
  682. Err = Error::success();
  683. }
  684. Archive::child_iterator Archive::child_begin(Error &Err,
  685. bool SkipInternal) const {
  686. if (isEmpty())
  687. return child_end();
  688. if (SkipInternal)
  689. return child_iterator(Child(this, FirstRegularData,
  690. FirstRegularStartOfFile),
  691. &Err);
  692. const char *Loc = Data.getBufferStart() + strlen(Magic);
  693. Child C(this, Loc, &Err);
  694. if (Err)
  695. return child_end();
  696. return child_iterator(C, &Err);
  697. }
  698. Archive::child_iterator Archive::child_end() const {
  699. return child_iterator(Child(nullptr, nullptr, nullptr), nullptr);
  700. }
  701. StringRef Archive::Symbol::getName() const {
  702. return Parent->getSymbolTable().begin() + StringIndex;
  703. }
  704. Expected<Archive::Child> Archive::Symbol::getMember() const {
  705. const char *Buf = Parent->getSymbolTable().begin();
  706. const char *Offsets = Buf;
  707. if (Parent->kind() == K_MIPS64 || Parent->kind() == K_DARWIN64)
  708. Offsets += sizeof(uint64_t);
  709. else
  710. Offsets += sizeof(uint32_t);
  711. uint32_t Offset = 0;
  712. if (Parent->kind() == K_GNU) {
  713. Offset = read32be(Offsets + SymbolIndex * 4);
  714. } else if (Parent->kind() == K_MIPS64) {
  715. Offset = read64be(Offsets + SymbolIndex * 8);
  716. } else if (Parent->kind() == K_BSD) {
  717. // The SymbolIndex is an index into the ranlib structs that start at
  718. // Offsets (the first uint32_t is the number of bytes of the ranlib
  719. // structs). The ranlib structs are a pair of uint32_t's the first
  720. // being a string table offset and the second being the offset into
  721. // the archive of the member that defines the symbol. Which is what
  722. // is needed here.
  723. Offset = read32le(Offsets + SymbolIndex * 8 + 4);
  724. } else if (Parent->kind() == K_DARWIN64) {
  725. // The SymbolIndex is an index into the ranlib_64 structs that start at
  726. // Offsets (the first uint64_t is the number of bytes of the ranlib_64
  727. // structs). The ranlib_64 structs are a pair of uint64_t's the first
  728. // being a string table offset and the second being the offset into
  729. // the archive of the member that defines the symbol. Which is what
  730. // is needed here.
  731. Offset = read64le(Offsets + SymbolIndex * 16 + 8);
  732. } else {
  733. // Skip offsets.
  734. uint32_t MemberCount = read32le(Buf);
  735. Buf += MemberCount * 4 + 4;
  736. uint32_t SymbolCount = read32le(Buf);
  737. if (SymbolIndex >= SymbolCount)
  738. return errorCodeToError(object_error::parse_failed);
  739. // Skip SymbolCount to get to the indices table.
  740. const char *Indices = Buf + 4;
  741. // Get the index of the offset in the file member offset table for this
  742. // symbol.
  743. uint16_t OffsetIndex = read16le(Indices + SymbolIndex * 2);
  744. // Subtract 1 since OffsetIndex is 1 based.
  745. --OffsetIndex;
  746. if (OffsetIndex >= MemberCount)
  747. return errorCodeToError(object_error::parse_failed);
  748. Offset = read32le(Offsets + OffsetIndex * 4);
  749. }
  750. const char *Loc = Parent->getData().begin() + Offset;
  751. Error Err = Error::success();
  752. Child C(Parent, Loc, &Err);
  753. if (Err)
  754. return std::move(Err);
  755. return C;
  756. }
  757. Archive::Symbol Archive::Symbol::getNext() const {
  758. Symbol t(*this);
  759. if (Parent->kind() == K_BSD) {
  760. // t.StringIndex is an offset from the start of the __.SYMDEF or
  761. // "__.SYMDEF SORTED" member into the string table for the ranlib
  762. // struct indexed by t.SymbolIndex . To change t.StringIndex to the
  763. // offset in the string table for t.SymbolIndex+1 we subtract the
  764. // its offset from the start of the string table for t.SymbolIndex
  765. // and add the offset of the string table for t.SymbolIndex+1.
  766. // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
  767. // which is the number of bytes of ranlib structs that follow. The ranlib
  768. // structs are a pair of uint32_t's the first being a string table offset
  769. // and the second being the offset into the archive of the member that
  770. // define the symbol. After that the next uint32_t is the byte count of
  771. // the string table followed by the string table.
  772. const char *Buf = Parent->getSymbolTable().begin();
  773. uint32_t RanlibCount = 0;
  774. RanlibCount = read32le(Buf) / 8;
  775. // If t.SymbolIndex + 1 will be past the count of symbols (the RanlibCount)
  776. // don't change the t.StringIndex as we don't want to reference a ranlib
  777. // past RanlibCount.
  778. if (t.SymbolIndex + 1 < RanlibCount) {
  779. const char *Ranlibs = Buf + 4;
  780. uint32_t CurRanStrx = 0;
  781. uint32_t NextRanStrx = 0;
  782. CurRanStrx = read32le(Ranlibs + t.SymbolIndex * 8);
  783. NextRanStrx = read32le(Ranlibs + (t.SymbolIndex + 1) * 8);
  784. t.StringIndex -= CurRanStrx;
  785. t.StringIndex += NextRanStrx;
  786. }
  787. } else {
  788. // Go to one past next null.
  789. t.StringIndex = Parent->getSymbolTable().find('\0', t.StringIndex) + 1;
  790. }
  791. ++t.SymbolIndex;
  792. return t;
  793. }
  794. Archive::symbol_iterator Archive::symbol_begin() const {
  795. if (!hasSymbolTable())
  796. return symbol_iterator(Symbol(this, 0, 0));
  797. const char *buf = getSymbolTable().begin();
  798. if (kind() == K_GNU) {
  799. uint32_t symbol_count = 0;
  800. symbol_count = read32be(buf);
  801. buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t)));
  802. } else if (kind() == K_MIPS64) {
  803. uint64_t symbol_count = read64be(buf);
  804. buf += sizeof(uint64_t) + (symbol_count * (sizeof(uint64_t)));
  805. } else if (kind() == K_BSD) {
  806. // The __.SYMDEF or "__.SYMDEF SORTED" member starts with a uint32_t
  807. // which is the number of bytes of ranlib structs that follow. The ranlib
  808. // structs are a pair of uint32_t's the first being a string table offset
  809. // and the second being the offset into the archive of the member that
  810. // define the symbol. After that the next uint32_t is the byte count of
  811. // the string table followed by the string table.
  812. uint32_t ranlib_count = 0;
  813. ranlib_count = read32le(buf) / 8;
  814. const char *ranlibs = buf + 4;
  815. uint32_t ran_strx = 0;
  816. ran_strx = read32le(ranlibs);
  817. buf += sizeof(uint32_t) + (ranlib_count * (2 * (sizeof(uint32_t))));
  818. // Skip the byte count of the string table.
  819. buf += sizeof(uint32_t);
  820. buf += ran_strx;
  821. } else if (kind() == K_DARWIN64) {
  822. // The __.SYMDEF_64 or "__.SYMDEF_64 SORTED" member starts with a uint64_t
  823. // which is the number of bytes of ranlib_64 structs that follow. The
  824. // ranlib_64 structs are a pair of uint64_t's the first being a string
  825. // table offset and the second being the offset into the archive of the
  826. // member that define the symbol. After that the next uint64_t is the byte
  827. // count of the string table followed by the string table.
  828. uint64_t ranlib_count = 0;
  829. ranlib_count = read64le(buf) / 16;
  830. const char *ranlibs = buf + 8;
  831. uint64_t ran_strx = 0;
  832. ran_strx = read64le(ranlibs);
  833. buf += sizeof(uint64_t) + (ranlib_count * (2 * (sizeof(uint64_t))));
  834. // Skip the byte count of the string table.
  835. buf += sizeof(uint64_t);
  836. buf += ran_strx;
  837. } else {
  838. uint32_t member_count = 0;
  839. uint32_t symbol_count = 0;
  840. member_count = read32le(buf);
  841. buf += 4 + (member_count * 4); // Skip offsets.
  842. symbol_count = read32le(buf);
  843. buf += 4 + (symbol_count * 2); // Skip indices.
  844. }
  845. uint32_t string_start_offset = buf - getSymbolTable().begin();
  846. return symbol_iterator(Symbol(this, 0, string_start_offset));
  847. }
  848. Archive::symbol_iterator Archive::symbol_end() const {
  849. return symbol_iterator(Symbol(this, getNumberOfSymbols(), 0));
  850. }
  851. uint32_t Archive::getNumberOfSymbols() const {
  852. if (!hasSymbolTable())
  853. return 0;
  854. const char *buf = getSymbolTable().begin();
  855. if (kind() == K_GNU)
  856. return read32be(buf);
  857. if (kind() == K_MIPS64)
  858. return read64be(buf);
  859. if (kind() == K_BSD)
  860. return read32le(buf) / 8;
  861. if (kind() == K_DARWIN64)
  862. return read64le(buf) / 16;
  863. uint32_t member_count = 0;
  864. member_count = read32le(buf);
  865. buf += 4 + (member_count * 4); // Skip offsets.
  866. return read32le(buf);
  867. }
  868. Expected<Optional<Archive::Child>> Archive::findSym(StringRef name) const {
  869. Archive::symbol_iterator bs = symbol_begin();
  870. Archive::symbol_iterator es = symbol_end();
  871. for (; bs != es; ++bs) {
  872. StringRef SymName = bs->getName();
  873. if (SymName == name) {
  874. if (auto MemberOrErr = bs->getMember())
  875. return Child(*MemberOrErr);
  876. else
  877. return MemberOrErr.takeError();
  878. }
  879. }
  880. return Optional<Child>();
  881. }
  882. // Returns true if archive file contains no member file.
  883. bool Archive::isEmpty() const { return Data.getBufferSize() == 8; }
  884. bool Archive::hasSymbolTable() const { return !SymbolTable.empty(); }