123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313 |
- //===-- llvm-mc-assemble-fuzzer.cpp - Fuzzer for the MC layer -------------===//
- //
- // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
- // See https://llvm.org/LICENSE.txt for license information.
- // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
- //
- //===----------------------------------------------------------------------===//
- //
- //===----------------------------------------------------------------------===//
- #include "llvm-c/Target.h"
- #include "llvm/MC/SubtargetFeature.h"
- #include "llvm/MC/MCAsmBackend.h"
- #include "llvm/MC/MCAsmInfo.h"
- #include "llvm/MC/MCCodeEmitter.h"
- #include "llvm/MC/MCContext.h"
- #include "llvm/MC/MCInstPrinter.h"
- #include "llvm/MC/MCInstrInfo.h"
- #include "llvm/MC/MCObjectFileInfo.h"
- #include "llvm/MC/MCObjectWriter.h"
- #include "llvm/MC/MCParser/AsmLexer.h"
- #include "llvm/MC/MCParser/MCTargetAsmParser.h"
- #include "llvm/MC/MCRegisterInfo.h"
- #include "llvm/MC/MCSectionMachO.h"
- #include "llvm/MC/MCStreamer.h"
- #include "llvm/MC/MCSubtargetInfo.h"
- #include "llvm/MC/MCTargetOptionsCommandFlags.inc"
- #include "llvm/Support/MemoryBuffer.h"
- #include "llvm/Support/CommandLine.h"
- #include "llvm/Support/FileUtilities.h"
- #include "llvm/Support/raw_ostream.h"
- #include "llvm/Support/SourceMgr.h"
- #include "llvm/Support/TargetSelect.h"
- #include "llvm/Support/TargetRegistry.h"
- #include "llvm/Support/ToolOutputFile.h"
- using namespace llvm;
- static cl::opt<std::string>
- TripleName("triple", cl::desc("Target triple to assemble for, "
- "see -version for available targets"));
- static cl::opt<std::string>
- MCPU("mcpu",
- cl::desc("Target a specific cpu type (-mcpu=help for details)"),
- cl::value_desc("cpu-name"), cl::init(""));
- // This is useful for variable-length instruction sets.
- static cl::opt<unsigned> InsnLimit(
- "insn-limit",
- cl::desc("Limit the number of instructions to process (0 for no limit)"),
- cl::value_desc("count"), cl::init(0));
- static cl::list<std::string>
- MAttrs("mattr", cl::CommaSeparated,
- cl::desc("Target specific attributes (-mattr=help for details)"),
- cl::value_desc("a1,+a2,-a3,..."));
- // The feature string derived from -mattr's values.
- std::string FeaturesStr;
- static cl::list<std::string>
- FuzzerArgs("fuzzer-args", cl::Positional,
- cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore,
- cl::PositionalEatsArgs);
- static std::vector<char *> ModifiedArgv;
- enum OutputFileType {
- OFT_Null,
- OFT_AssemblyFile,
- OFT_ObjectFile
- };
- static cl::opt<OutputFileType>
- FileType("filetype", cl::init(OFT_AssemblyFile),
- cl::desc("Choose an output file type:"),
- cl::values(
- clEnumValN(OFT_AssemblyFile, "asm",
- "Emit an assembly ('.s') file"),
- clEnumValN(OFT_Null, "null",
- "Don't emit anything (for timing purposes)"),
- clEnumValN(OFT_ObjectFile, "obj",
- "Emit a native object ('.o') file")));
- class LLVMFuzzerInputBuffer : public MemoryBuffer
- {
- public:
- LLVMFuzzerInputBuffer(const uint8_t *data_, size_t size_)
- : Data(reinterpret_cast<const char *>(data_)),
- Size(size_) {
- init(Data, Data+Size, false);
- }
- virtual BufferKind getBufferKind() const {
- return MemoryBuffer_Malloc; // it's not disk-backed so I think that's
- // the intent ... though AFAIK it
- // probably came from an mmap or sbrk
- }
- private:
- const char *Data;
- size_t Size;
- };
- static int AssembleInput(const char *ProgName, const Target *TheTarget,
- SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str,
- MCAsmInfo &MAI, MCSubtargetInfo &STI,
- MCInstrInfo &MCII, MCTargetOptions &MCOptions) {
- static const bool NoInitialTextSection = false;
- std::unique_ptr<MCAsmParser> Parser(
- createMCAsmParser(SrcMgr, Ctx, Str, MAI));
- std::unique_ptr<MCTargetAsmParser> TAP(
- TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions));
- if (!TAP) {
- errs() << ProgName
- << ": error: this target '" << TripleName
- << "', does not support assembly parsing.\n";
- abort();
- }
- Parser->setTargetParser(*TAP);
- return Parser->Run(NoInitialTextSection);
- }
- int AssembleOneInput(const uint8_t *Data, size_t Size) {
- const bool ShowInst = false;
- const bool AsmVerbose = false;
- const bool UseDwarfDirectory = true;
- Triple TheTriple(Triple::normalize(TripleName));
- SourceMgr SrcMgr;
- std::unique_ptr<MemoryBuffer> BufferPtr(new LLVMFuzzerInputBuffer(Data, Size));
- // Tell SrcMgr about this buffer, which is what the parser will pick up.
- SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc());
- static const std::vector<std::string> NoIncludeDirs;
- SrcMgr.setIncludeDirs(NoIncludeDirs);
- static std::string ArchName;
- std::string Error;
- const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple,
- Error);
- if (!TheTarget) {
- errs() << "error: this target '" << TheTriple.normalize()
- << "/" << ArchName << "', was not found: '" << Error << "'\n";
- abort();
- }
- std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
- if (!MRI) {
- errs() << "Unable to create target register info!";
- abort();
- }
- std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TripleName));
- if (!MAI) {
- errs() << "Unable to create target asm info!";
- abort();
- }
- MCObjectFileInfo MOFI;
- MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr);
- static const bool UsePIC = false;
- MOFI.InitMCObjectFileInfo(TheTriple, UsePIC, Ctx);
- const unsigned OutputAsmVariant = 0;
- std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
- MCInstPrinter *IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant,
- *MAI, *MCII, *MRI);
- if (!IP) {
- errs()
- << "error: unable to create instruction printer for target triple '"
- << TheTriple.normalize() << "' with assembly variant "
- << OutputAsmVariant << ".\n";
- abort();
- }
- const char *ProgName = "llvm-mc-fuzzer";
- std::unique_ptr<MCSubtargetInfo> STI(
- TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
- std::unique_ptr<MCCodeEmitter> CE = nullptr;
- std::unique_ptr<MCAsmBackend> MAB = nullptr;
- MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
- std::string OutputString;
- raw_string_ostream Out(OutputString);
- auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
- std::unique_ptr<MCStreamer> Str;
- if (FileType == OFT_AssemblyFile) {
- Str.reset(TheTarget->createAsmStreamer(Ctx, std::move(FOut), AsmVerbose,
- UseDwarfDirectory, IP, std::move(CE),
- std::move(MAB), ShowInst));
- } else {
- assert(FileType == OFT_ObjectFile && "Invalid file type!");
- std::error_code EC;
- const std::string OutputFilename = "-";
- auto Out =
- llvm::make_unique<ToolOutputFile>(OutputFilename, EC, sys::fs::OF_None);
- if (EC) {
- errs() << EC.message() << '\n';
- abort();
- }
- // Don't waste memory on names of temp labels.
- Ctx.setUseNamesOnTempLabels(false);
- std::unique_ptr<buffer_ostream> BOS;
- raw_pwrite_stream *OS = &Out->os();
- if (!Out->os().supportsSeeking()) {
- BOS = make_unique<buffer_ostream>(Out->os());
- OS = BOS.get();
- }
- MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
- MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*STI, *MRI, MCOptions);
- Str.reset(TheTarget->createMCObjectStreamer(
- TheTriple, Ctx, std::unique_ptr<MCAsmBackend>(MAB),
- MAB->createObjectWriter(*OS), std::unique_ptr<MCCodeEmitter>(CE), *STI,
- MCOptions.MCRelaxAll, MCOptions.MCIncrementalLinkerCompatible,
- /*DWARFMustBeAtTheEnd*/ false));
- }
- const int Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI,
- *MCII, MCOptions);
- (void) Res;
- return 0;
- }
- extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
- return AssembleOneInput(Data, Size);
- }
- extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc,
- char ***argv) {
- // The command line is unusual compared to other fuzzers due to the need to
- // specify the target. Options like -triple, -mcpu, and -mattr work like
- // their counterparts in llvm-mc, while -fuzzer-args collects options for the
- // fuzzer itself.
- //
- // Examples:
- //
- // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
- // 4-bytes each and use the contents of ./corpus as the test corpus:
- // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
- // -fuzzer-args -max_len=4 -runs=100000 ./corpus
- //
- // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
- // feature enabled using up to 64-byte inputs:
- // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
- // -disassemble -fuzzer-args ./corpus
- //
- // If your aim is to find instructions that are not tested, then it is
- // advisable to constrain the maximum input size to a single instruction
- // using -max_len as in the first example. This results in a test corpus of
- // individual instructions that test unique paths. Without this constraint,
- // there will be considerable redundancy in the corpus.
- char **OriginalArgv = *argv;
- LLVMInitializeAllTargetInfos();
- LLVMInitializeAllTargetMCs();
- LLVMInitializeAllAsmParsers();
- cl::ParseCommandLineOptions(*argc, OriginalArgv);
- // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
- // the driver can parse its arguments.
- //
- // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
- // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
- // non-const buffer to avoid the need to clean up when the fuzzer terminates.
- ModifiedArgv.push_back(OriginalArgv[0]);
- for (const auto &FuzzerArg : FuzzerArgs) {
- for (int i = 1; i < *argc; ++i) {
- if (FuzzerArg == OriginalArgv[i])
- ModifiedArgv.push_back(OriginalArgv[i]);
- }
- }
- *argc = ModifiedArgv.size();
- *argv = ModifiedArgv.data();
- // Package up features to be passed to target/subtarget
- // We have to pass it via a global since the callback doesn't
- // permit any user data.
- if (MAttrs.size()) {
- SubtargetFeatures Features;
- for (unsigned i = 0; i != MAttrs.size(); ++i)
- Features.AddFeature(MAttrs[i]);
- FeaturesStr = Features.getString();
- }
- if (TripleName.empty())
- TripleName = sys::getDefaultTargetTriple();
- return 0;
- }
|