123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557 |
- //===- InputFiles.cpp -----------------------------------------------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- #include "InputFiles.h"
- #include "Config.h"
- #include "InputChunks.h"
- #include "InputEvent.h"
- #include "InputGlobal.h"
- #include "SymbolTable.h"
- #include "lld/Common/ErrorHandler.h"
- #include "lld/Common/Memory.h"
- #include "lld/Common/Reproduce.h"
- #include "llvm/Object/Binary.h"
- #include "llvm/Object/Wasm.h"
- #include "llvm/Support/TarWriter.h"
- #include "llvm/Support/raw_ostream.h"
- #define DEBUG_TYPE "lld"
- using namespace llvm;
- using namespace llvm::object;
- using namespace llvm::wasm;
- namespace lld {
- // Returns a string in the format of "foo.o" or "foo.a(bar.o)".
- std::string toString(const wasm::InputFile *file) {
- if (!file)
- return "<internal>";
- if (file->archiveName.empty())
- return file->getName();
- return (file->archiveName + "(" + file->getName() + ")").str();
- }
- namespace wasm {
- std::unique_ptr<llvm::TarWriter> tar;
- Optional<MemoryBufferRef> readFile(StringRef path) {
- log("Loading: " + path);
- auto mbOrErr = MemoryBuffer::getFile(path);
- if (auto ec = mbOrErr.getError()) {
- error("cannot open " + path + ": " + ec.message());
- return None;
- }
- std::unique_ptr<MemoryBuffer> &mb = *mbOrErr;
- MemoryBufferRef mbref = mb->getMemBufferRef();
- make<std::unique_ptr<MemoryBuffer>>(std::move(mb)); // take MB ownership
- if (tar)
- tar->append(relativeToRoot(path), mbref.getBuffer());
- return mbref;
- }
- InputFile *createObjectFile(MemoryBufferRef mb,
- StringRef archiveName) {
- file_magic magic = identify_magic(mb.getBuffer());
- if (magic == file_magic::wasm_object) {
- std::unique_ptr<Binary> bin =
- CHECK(createBinary(mb), mb.getBufferIdentifier());
- auto *obj = cast<WasmObjectFile>(bin.get());
- if (obj->isSharedObject())
- return make<SharedFile>(mb);
- return make<ObjFile>(mb, archiveName);
- }
- if (magic == file_magic::bitcode)
- return make<BitcodeFile>(mb, archiveName);
- fatal("unknown file type: " + mb.getBufferIdentifier());
- }
- void ObjFile::dumpInfo() const {
- log("info for: " + toString(this) +
- "\n Symbols : " + Twine(symbols.size()) +
- "\n Function Imports : " + Twine(wasmObj->getNumImportedFunctions()) +
- "\n Global Imports : " + Twine(wasmObj->getNumImportedGlobals()) +
- "\n Event Imports : " + Twine(wasmObj->getNumImportedEvents()));
- }
- // Relocations contain either symbol or type indices. This function takes a
- // relocation and returns relocated index (i.e. translates from the input
- // symbol/type space to the output symbol/type space).
- uint32_t ObjFile::calcNewIndex(const WasmRelocation &reloc) const {
- if (reloc.Type == R_WASM_TYPE_INDEX_LEB) {
- assert(typeIsUsed[reloc.Index]);
- return typeMap[reloc.Index];
- }
- const Symbol *sym = symbols[reloc.Index];
- if (auto *ss = dyn_cast<SectionSymbol>(sym))
- sym = ss->getOutputSectionSymbol();
- return sym->getOutputSymbolIndex();
- }
- // Relocations can contain addend for combined sections. This function takes a
- // relocation and returns updated addend by offset in the output section.
- uint32_t ObjFile::calcNewAddend(const WasmRelocation &reloc) const {
- switch (reloc.Type) {
- case R_WASM_MEMORY_ADDR_LEB:
- case R_WASM_MEMORY_ADDR_SLEB:
- case R_WASM_MEMORY_ADDR_REL_SLEB:
- case R_WASM_MEMORY_ADDR_I32:
- case R_WASM_FUNCTION_OFFSET_I32:
- return reloc.Addend;
- case R_WASM_SECTION_OFFSET_I32:
- return getSectionSymbol(reloc.Index)->section->outputOffset + reloc.Addend;
- default:
- llvm_unreachable("unexpected relocation type");
- }
- }
- // Calculate the value we expect to find at the relocation location.
- // This is used as a sanity check before applying a relocation to a given
- // location. It is useful for catching bugs in the compiler and linker.
- uint32_t ObjFile::calcExpectedValue(const WasmRelocation &reloc) const {
- switch (reloc.Type) {
- case R_WASM_TABLE_INDEX_I32:
- case R_WASM_TABLE_INDEX_SLEB:
- case R_WASM_TABLE_INDEX_REL_SLEB: {
- const WasmSymbol &sym = wasmObj->syms()[reloc.Index];
- return tableEntries[sym.Info.ElementIndex];
- }
- case R_WASM_MEMORY_ADDR_SLEB:
- case R_WASM_MEMORY_ADDR_I32:
- case R_WASM_MEMORY_ADDR_LEB:
- case R_WASM_MEMORY_ADDR_REL_SLEB: {
- const WasmSymbol &sym = wasmObj->syms()[reloc.Index];
- if (sym.isUndefined())
- return 0;
- const WasmSegment &segment =
- wasmObj->dataSegments()[sym.Info.DataRef.Segment];
- return segment.Data.Offset.Value.Int32 + sym.Info.DataRef.Offset +
- reloc.Addend;
- }
- case R_WASM_FUNCTION_OFFSET_I32: {
- const WasmSymbol &sym = wasmObj->syms()[reloc.Index];
- InputFunction *f =
- functions[sym.Info.ElementIndex - wasmObj->getNumImportedFunctions()];
- return f->getFunctionInputOffset() + f->getFunctionCodeOffset() +
- reloc.Addend;
- }
- case R_WASM_SECTION_OFFSET_I32:
- return reloc.Addend;
- case R_WASM_TYPE_INDEX_LEB:
- return reloc.Index;
- case R_WASM_FUNCTION_INDEX_LEB:
- case R_WASM_GLOBAL_INDEX_LEB:
- case R_WASM_EVENT_INDEX_LEB: {
- const WasmSymbol &sym = wasmObj->syms()[reloc.Index];
- return sym.Info.ElementIndex;
- }
- default:
- llvm_unreachable("unknown relocation type");
- }
- }
- // Translate from the relocation's index into the final linked output value.
- uint32_t ObjFile::calcNewValue(const WasmRelocation &reloc) const {
- const Symbol* sym = nullptr;
- if (reloc.Type != R_WASM_TYPE_INDEX_LEB) {
- sym = symbols[reloc.Index];
- // We can end up with relocations against non-live symbols. For example
- // in debug sections.
- if ((isa<FunctionSymbol>(sym) || isa<DataSymbol>(sym)) && !sym->isLive())
- return 0;
- }
- switch (reloc.Type) {
- case R_WASM_TABLE_INDEX_I32:
- case R_WASM_TABLE_INDEX_SLEB:
- case R_WASM_TABLE_INDEX_REL_SLEB: {
- if (!getFunctionSymbol(reloc.Index)->hasTableIndex())
- return 0;
- uint32_t index = getFunctionSymbol(reloc.Index)->getTableIndex();
- if (reloc.Type == R_WASM_TABLE_INDEX_REL_SLEB)
- index -= config->tableBase;
- return index;
- }
- case R_WASM_MEMORY_ADDR_SLEB:
- case R_WASM_MEMORY_ADDR_I32:
- case R_WASM_MEMORY_ADDR_LEB:
- case R_WASM_MEMORY_ADDR_REL_SLEB:
- if (isa<UndefinedData>(sym) || sym->isUndefWeak())
- return 0;
- return cast<DefinedData>(sym)->getVirtualAddress() + reloc.Addend;
- case R_WASM_TYPE_INDEX_LEB:
- return typeMap[reloc.Index];
- case R_WASM_FUNCTION_INDEX_LEB:
- return getFunctionSymbol(reloc.Index)->getFunctionIndex();
- case R_WASM_GLOBAL_INDEX_LEB:
- if (auto gs = dyn_cast<GlobalSymbol>(sym))
- return gs->getGlobalIndex();
- return sym->getGOTIndex();
- case R_WASM_EVENT_INDEX_LEB:
- return getEventSymbol(reloc.Index)->getEventIndex();
- case R_WASM_FUNCTION_OFFSET_I32: {
- auto *f = cast<DefinedFunction>(sym);
- return f->function->outputOffset + f->function->getFunctionCodeOffset() +
- reloc.Addend;
- }
- case R_WASM_SECTION_OFFSET_I32:
- return getSectionSymbol(reloc.Index)->section->outputOffset + reloc.Addend;
- default:
- llvm_unreachable("unknown relocation type");
- }
- }
- template <class T>
- static void setRelocs(const std::vector<T *> &chunks,
- const WasmSection *section) {
- if (!section)
- return;
- ArrayRef<WasmRelocation> relocs = section->Relocations;
- assert(std::is_sorted(relocs.begin(), relocs.end(),
- [](const WasmRelocation &r1, const WasmRelocation &r2) {
- return r1.Offset < r2.Offset;
- }));
- assert(std::is_sorted(
- chunks.begin(), chunks.end(), [](InputChunk *c1, InputChunk *c2) {
- return c1->getInputSectionOffset() < c2->getInputSectionOffset();
- }));
- auto relocsNext = relocs.begin();
- auto relocsEnd = relocs.end();
- auto relocLess = [](const WasmRelocation &r, uint32_t val) {
- return r.Offset < val;
- };
- for (InputChunk *c : chunks) {
- auto relocsStart = std::lower_bound(relocsNext, relocsEnd,
- c->getInputSectionOffset(), relocLess);
- relocsNext = std::lower_bound(
- relocsStart, relocsEnd, c->getInputSectionOffset() + c->getInputSize(),
- relocLess);
- c->setRelocations(ArrayRef<WasmRelocation>(relocsStart, relocsNext));
- }
- }
- void ObjFile::parse(bool ignoreComdats) {
- // Parse a memory buffer as a wasm file.
- LLVM_DEBUG(dbgs() << "Parsing object: " << toString(this) << "\n");
- std::unique_ptr<Binary> bin = CHECK(createBinary(mb), toString(this));
- auto *obj = dyn_cast<WasmObjectFile>(bin.get());
- if (!obj)
- fatal(toString(this) + ": not a wasm file");
- if (!obj->isRelocatableObject())
- fatal(toString(this) + ": not a relocatable wasm file");
- bin.release();
- wasmObj.reset(obj);
- // Build up a map of function indices to table indices for use when
- // verifying the existing table index relocations
- uint32_t totalFunctions =
- wasmObj->getNumImportedFunctions() + wasmObj->functions().size();
- tableEntries.resize(totalFunctions);
- for (const WasmElemSegment &seg : wasmObj->elements()) {
- if (seg.Offset.Opcode != WASM_OPCODE_I32_CONST)
- fatal(toString(this) + ": invalid table elements");
- uint32_t offset = seg.Offset.Value.Int32;
- for (uint32_t index = 0; index < seg.Functions.size(); index++) {
- uint32_t functionIndex = seg.Functions[index];
- tableEntries[functionIndex] = offset + index;
- }
- }
- uint32_t sectionIndex = 0;
- // Bool for each symbol, true if called directly. This allows us to implement
- // a weaker form of signature checking where undefined functions that are not
- // called directly (i.e. only address taken) don't have to match the defined
- // function's signature. We cannot do this for directly called functions
- // because those signatures are checked at validation times.
- // See https://bugs.llvm.org/show_bug.cgi?id=40412
- std::vector<bool> isCalledDirectly(wasmObj->getNumberOfSymbols(), false);
- for (const SectionRef &sec : wasmObj->sections()) {
- const WasmSection §ion = wasmObj->getWasmSection(sec);
- // Wasm objects can have at most one code and one data section.
- if (section.Type == WASM_SEC_CODE) {
- assert(!codeSection);
- codeSection = §ion;
- } else if (section.Type == WASM_SEC_DATA) {
- assert(!dataSection);
- dataSection = §ion;
- } else if (section.Type == WASM_SEC_CUSTOM) {
- customSections.emplace_back(make<InputSection>(section, this));
- customSections.back()->setRelocations(section.Relocations);
- customSectionsByIndex[sectionIndex] = customSections.back();
- }
- sectionIndex++;
- // Scans relocations to dermine determine if a function symbol is called
- // directly
- for (const WasmRelocation &reloc : section.Relocations)
- if (reloc.Type == R_WASM_FUNCTION_INDEX_LEB)
- isCalledDirectly[reloc.Index] = true;
- }
- typeMap.resize(getWasmObj()->types().size());
- typeIsUsed.resize(getWasmObj()->types().size(), false);
- ArrayRef<StringRef> comdats = wasmObj->linkingData().Comdats;
- for (StringRef comdat : comdats) {
- bool isNew = ignoreComdats || symtab->addComdat(comdat);
- keptComdats.push_back(isNew);
- }
- // Populate `Segments`.
- for (const WasmSegment &s : wasmObj->dataSegments()) {
- auto* seg = make<InputSegment>(s, this);
- seg->discarded = isExcludedByComdat(seg);
- segments.emplace_back(seg);
- }
- setRelocs(segments, dataSection);
- // Populate `Functions`.
- ArrayRef<WasmFunction> funcs = wasmObj->functions();
- ArrayRef<uint32_t> funcTypes = wasmObj->functionTypes();
- ArrayRef<WasmSignature> types = wasmObj->types();
- functions.reserve(funcs.size());
- for (size_t i = 0, e = funcs.size(); i != e; ++i) {
- auto* func = make<InputFunction>(types[funcTypes[i]], &funcs[i], this);
- func->discarded = isExcludedByComdat(func);
- functions.emplace_back(func);
- }
- setRelocs(functions, codeSection);
- // Populate `Globals`.
- for (const WasmGlobal &g : wasmObj->globals())
- globals.emplace_back(make<InputGlobal>(g, this));
- // Populate `Events`.
- for (const WasmEvent &e : wasmObj->events())
- events.emplace_back(make<InputEvent>(types[e.Type.SigIndex], e, this));
- // Populate `Symbols` based on the symbols in the object.
- symbols.reserve(wasmObj->getNumberOfSymbols());
- for (const SymbolRef &sym : wasmObj->symbols()) {
- const WasmSymbol &wasmSym = wasmObj->getWasmSymbol(sym.getRawDataRefImpl());
- if (wasmSym.isDefined()) {
- // createDefined may fail if the symbol is comdat excluded in which case
- // we fall back to creating an undefined symbol
- if (Symbol *d = createDefined(wasmSym)) {
- symbols.push_back(d);
- continue;
- }
- }
- size_t idx = symbols.size();
- symbols.push_back(createUndefined(wasmSym, isCalledDirectly[idx]));
- }
- }
- bool ObjFile::isExcludedByComdat(InputChunk *chunk) const {
- uint32_t c = chunk->getComdat();
- if (c == UINT32_MAX)
- return false;
- return !keptComdats[c];
- }
- FunctionSymbol *ObjFile::getFunctionSymbol(uint32_t index) const {
- return cast<FunctionSymbol>(symbols[index]);
- }
- GlobalSymbol *ObjFile::getGlobalSymbol(uint32_t index) const {
- return cast<GlobalSymbol>(symbols[index]);
- }
- EventSymbol *ObjFile::getEventSymbol(uint32_t index) const {
- return cast<EventSymbol>(symbols[index]);
- }
- SectionSymbol *ObjFile::getSectionSymbol(uint32_t index) const {
- return cast<SectionSymbol>(symbols[index]);
- }
- DataSymbol *ObjFile::getDataSymbol(uint32_t index) const {
- return cast<DataSymbol>(symbols[index]);
- }
- Symbol *ObjFile::createDefined(const WasmSymbol &sym) {
- StringRef name = sym.Info.Name;
- uint32_t flags = sym.Info.Flags;
- switch (sym.Info.Kind) {
- case WASM_SYMBOL_TYPE_FUNCTION: {
- InputFunction *func =
- functions[sym.Info.ElementIndex - wasmObj->getNumImportedFunctions()];
- if (sym.isBindingLocal())
- return make<DefinedFunction>(name, flags, this, func);
- if (func->discarded)
- return nullptr;
- return symtab->addDefinedFunction(name, flags, this, func);
- }
- case WASM_SYMBOL_TYPE_DATA: {
- InputSegment *seg = segments[sym.Info.DataRef.Segment];
- uint32_t offset = sym.Info.DataRef.Offset;
- uint32_t size = sym.Info.DataRef.Size;
- if (sym.isBindingLocal())
- return make<DefinedData>(name, flags, this, seg, offset, size);
- if (seg->discarded)
- return nullptr;
- return symtab->addDefinedData(name, flags, this, seg, offset, size);
- }
- case WASM_SYMBOL_TYPE_GLOBAL: {
- InputGlobal *global =
- globals[sym.Info.ElementIndex - wasmObj->getNumImportedGlobals()];
- if (sym.isBindingLocal())
- return make<DefinedGlobal>(name, flags, this, global);
- return symtab->addDefinedGlobal(name, flags, this, global);
- }
- case WASM_SYMBOL_TYPE_SECTION: {
- InputSection *section = customSectionsByIndex[sym.Info.ElementIndex];
- assert(sym.isBindingLocal());
- return make<SectionSymbol>(flags, section, this);
- }
- case WASM_SYMBOL_TYPE_EVENT: {
- InputEvent *event =
- events[sym.Info.ElementIndex - wasmObj->getNumImportedEvents()];
- if (sym.isBindingLocal())
- return make<DefinedEvent>(name, flags, this, event);
- return symtab->addDefinedEvent(name, flags, this, event);
- }
- }
- llvm_unreachable("unknown symbol kind");
- }
- Symbol *ObjFile::createUndefined(const WasmSymbol &sym, bool isCalledDirectly) {
- StringRef name = sym.Info.Name;
- uint32_t flags = sym.Info.Flags | WASM_SYMBOL_UNDEFINED;
- switch (sym.Info.Kind) {
- case WASM_SYMBOL_TYPE_FUNCTION:
- if (sym.isBindingLocal())
- return make<UndefinedFunction>(name, sym.Info.ImportName,
- sym.Info.ImportModule, flags, this,
- sym.Signature, isCalledDirectly);
- return symtab->addUndefinedFunction(name, sym.Info.ImportName,
- sym.Info.ImportModule, flags, this,
- sym.Signature, isCalledDirectly);
- case WASM_SYMBOL_TYPE_DATA:
- if (sym.isBindingLocal())
- return make<UndefinedData>(name, flags, this);
- return symtab->addUndefinedData(name, flags, this);
- case WASM_SYMBOL_TYPE_GLOBAL:
- if (sym.isBindingLocal())
- return make<UndefinedGlobal>(name, sym.Info.ImportName,
- sym.Info.ImportModule, flags, this,
- sym.GlobalType);
- return symtab->addUndefinedGlobal(name, sym.Info.ImportName,
- sym.Info.ImportModule, flags, this,
- sym.GlobalType);
- case WASM_SYMBOL_TYPE_SECTION:
- llvm_unreachable("section symbols cannot be undefined");
- }
- llvm_unreachable("unknown symbol kind");
- }
- void ArchiveFile::parse() {
- // Parse a MemoryBufferRef as an archive file.
- LLVM_DEBUG(dbgs() << "Parsing library: " << toString(this) << "\n");
- file = CHECK(Archive::create(mb), toString(this));
- // Read the symbol table to construct Lazy symbols.
- int count = 0;
- for (const Archive::Symbol &sym : file->symbols()) {
- symtab->addLazy(this, &sym);
- ++count;
- }
- LLVM_DEBUG(dbgs() << "Read " << count << " symbols\n");
- }
- void ArchiveFile::addMember(const Archive::Symbol *sym) {
- const Archive::Child &c =
- CHECK(sym->getMember(),
- "could not get the member for symbol " + sym->getName());
- // Don't try to load the same member twice (this can happen when members
- // mutually reference each other).
- if (!seen.insert(c.getChildOffset()).second)
- return;
- LLVM_DEBUG(dbgs() << "loading lazy: " << sym->getName() << "\n");
- LLVM_DEBUG(dbgs() << "from archive: " << toString(this) << "\n");
- MemoryBufferRef mb =
- CHECK(c.getMemoryBufferRef(),
- "could not get the buffer for the member defining symbol " +
- sym->getName());
- InputFile *obj = createObjectFile(mb, getName());
- symtab->addFile(obj);
- }
- static uint8_t mapVisibility(GlobalValue::VisibilityTypes gvVisibility) {
- switch (gvVisibility) {
- case GlobalValue::DefaultVisibility:
- return WASM_SYMBOL_VISIBILITY_DEFAULT;
- case GlobalValue::HiddenVisibility:
- case GlobalValue::ProtectedVisibility:
- return WASM_SYMBOL_VISIBILITY_HIDDEN;
- }
- llvm_unreachable("unknown visibility");
- }
- static Symbol *createBitcodeSymbol(const std::vector<bool> &keptComdats,
- const lto::InputFile::Symbol &objSym,
- BitcodeFile &f) {
- StringRef name = saver.save(objSym.getName());
- uint32_t flags = objSym.isWeak() ? WASM_SYMBOL_BINDING_WEAK : 0;
- flags |= mapVisibility(objSym.getVisibility());
- int c = objSym.getComdatIndex();
- bool excludedByComdat = c != -1 && !keptComdats[c];
- if (objSym.isUndefined() || excludedByComdat) {
- flags |= WASM_SYMBOL_UNDEFINED;
- if (objSym.isExecutable())
- return symtab->addUndefinedFunction(name, "", "", flags, &f, nullptr,
- true);
- return symtab->addUndefinedData(name, flags, &f);
- }
- if (objSym.isExecutable())
- return symtab->addDefinedFunction(name, flags, &f, nullptr);
- return symtab->addDefinedData(name, flags, &f, nullptr, 0, 0);
- }
- void BitcodeFile::parse() {
- obj = check(lto::InputFile::create(MemoryBufferRef(
- mb.getBuffer(), saver.save(archiveName + mb.getBufferIdentifier()))));
- Triple t(obj->getTargetTriple());
- if (t.getArch() != Triple::wasm32) {
- error(toString(mb.getBufferIdentifier()) + ": machine type must be wasm32");
- return;
- }
- std::vector<bool> keptComdats;
- for (StringRef s : obj->getComdatTable())
- keptComdats.push_back(symtab->addComdat(s));
- for (const lto::InputFile::Symbol &objSym : obj->symbols())
- symbols.push_back(createBitcodeSymbol(keptComdats, objSym, *this));
- }
- } // namespace wasm
- } // namespace lld
|