llvm-mc-assemble-fuzzer.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312
  1. //===--- llvm-mc-fuzzer.cpp - Fuzzer for the MC layer ---------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. //===----------------------------------------------------------------------===//
  11. #include "llvm-c/Target.h"
  12. #include "llvm/MC/SubtargetFeature.h"
  13. #include "llvm/MC/MCAsmBackend.h"
  14. #include "llvm/MC/MCAsmInfo.h"
  15. #include "llvm/MC/MCContext.h"
  16. #include "llvm/MC/MCInstPrinter.h"
  17. #include "llvm/MC/MCInstrInfo.h"
  18. #include "llvm/MC/MCObjectFileInfo.h"
  19. #include "llvm/MC/MCParser/AsmLexer.h"
  20. #include "llvm/MC/MCParser/MCTargetAsmParser.h"
  21. #include "llvm/MC/MCRegisterInfo.h"
  22. #include "llvm/MC/MCSectionMachO.h"
  23. #include "llvm/MC/MCStreamer.h"
  24. #include "llvm/MC/MCSubtargetInfo.h"
  25. #include "llvm/MC/MCTargetOptionsCommandFlags.h"
  26. #include "llvm/Support/MemoryBuffer.h"
  27. #include "llvm/Support/CommandLine.h"
  28. #include "llvm/Support/FileUtilities.h"
  29. #include "llvm/Support/raw_ostream.h"
  30. #include "llvm/Support/SourceMgr.h"
  31. #include "llvm/Support/TargetSelect.h"
  32. #include "llvm/Support/TargetRegistry.h"
  33. #include "llvm/Support/ToolOutputFile.h"
  34. using namespace llvm;
  35. static cl::opt<std::string>
  36. TripleName("triple", cl::desc("Target triple to assemble for, "
  37. "see -version for available targets"));
  38. static cl::opt<std::string>
  39. MCPU("mcpu",
  40. cl::desc("Target a specific cpu type (-mcpu=help for details)"),
  41. cl::value_desc("cpu-name"), cl::init(""));
  42. // This is useful for variable-length instruction sets.
  43. static cl::opt<unsigned> InsnLimit(
  44. "insn-limit",
  45. cl::desc("Limit the number of instructions to process (0 for no limit)"),
  46. cl::value_desc("count"), cl::init(0));
  47. static cl::list<std::string>
  48. MAttrs("mattr", cl::CommaSeparated,
  49. cl::desc("Target specific attributes (-mattr=help for details)"),
  50. cl::value_desc("a1,+a2,-a3,..."));
  51. // The feature string derived from -mattr's values.
  52. std::string FeaturesStr;
  53. static cl::list<std::string>
  54. FuzzerArgs("fuzzer-args", cl::Positional,
  55. cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore,
  56. cl::PositionalEatsArgs);
  57. static std::vector<char *> ModifiedArgv;
  58. enum OutputFileType {
  59. OFT_Null,
  60. OFT_AssemblyFile,
  61. OFT_ObjectFile
  62. };
  63. static cl::opt<OutputFileType>
  64. FileType("filetype", cl::init(OFT_AssemblyFile),
  65. cl::desc("Choose an output file type:"),
  66. cl::values(
  67. clEnumValN(OFT_AssemblyFile, "asm",
  68. "Emit an assembly ('.s') file"),
  69. clEnumValN(OFT_Null, "null",
  70. "Don't emit anything (for timing purposes)"),
  71. clEnumValN(OFT_ObjectFile, "obj",
  72. "Emit a native object ('.o') file")));
  73. class LLVMFuzzerInputBuffer : public MemoryBuffer
  74. {
  75. public:
  76. LLVMFuzzerInputBuffer(const uint8_t *data_, size_t size_)
  77. : Data(reinterpret_cast<const char *>(data_)),
  78. Size(size_) {
  79. init(Data, Data+Size, false);
  80. }
  81. virtual BufferKind getBufferKind() const {
  82. return MemoryBuffer_Malloc; // it's not disk-backed so I think that's
  83. // the intent ... though AFAIK it
  84. // probably came from an mmap or sbrk
  85. }
  86. private:
  87. const char *Data;
  88. size_t Size;
  89. };
  90. static int AssembleInput(const char *ProgName, const Target *TheTarget,
  91. SourceMgr &SrcMgr, MCContext &Ctx, MCStreamer &Str,
  92. MCAsmInfo &MAI, MCSubtargetInfo &STI,
  93. MCInstrInfo &MCII, MCTargetOptions &MCOptions) {
  94. static const bool NoInitialTextSection = false;
  95. std::unique_ptr<MCAsmParser> Parser(
  96. createMCAsmParser(SrcMgr, Ctx, Str, MAI));
  97. std::unique_ptr<MCTargetAsmParser> TAP(
  98. TheTarget->createMCAsmParser(STI, *Parser, MCII, MCOptions));
  99. if (!TAP) {
  100. errs() << ProgName
  101. << ": error: this target '" << TripleName
  102. << "', does not support assembly parsing.\n";
  103. abort();
  104. }
  105. Parser->setTargetParser(*TAP);
  106. return Parser->Run(NoInitialTextSection);
  107. }
  108. int AssembleOneInput(const uint8_t *Data, size_t Size) {
  109. const bool ShowInst = false;
  110. const bool AsmVerbose = false;
  111. const bool UseDwarfDirectory = true;
  112. Triple TheTriple(Triple::normalize(TripleName));
  113. SourceMgr SrcMgr;
  114. std::unique_ptr<MemoryBuffer> BufferPtr(new LLVMFuzzerInputBuffer(Data, Size));
  115. // Tell SrcMgr about this buffer, which is what the parser will pick up.
  116. SrcMgr.AddNewSourceBuffer(std::move(BufferPtr), SMLoc());
  117. static const std::vector<std::string> NoIncludeDirs;
  118. SrcMgr.setIncludeDirs(NoIncludeDirs);
  119. static std::string ArchName;
  120. std::string Error;
  121. const Target *TheTarget = TargetRegistry::lookupTarget(ArchName, TheTriple,
  122. Error);
  123. if (!TheTarget) {
  124. errs() << "error: this target '" << TheTriple.normalize()
  125. << "/" << ArchName << "', was not found: '" << Error << "'\n";
  126. abort();
  127. }
  128. std::unique_ptr<MCRegisterInfo> MRI(TheTarget->createMCRegInfo(TripleName));
  129. if (!MRI) {
  130. errs() << "Unable to create target register info!";
  131. abort();
  132. }
  133. std::unique_ptr<MCAsmInfo> MAI(TheTarget->createMCAsmInfo(*MRI, TripleName));
  134. if (!MAI) {
  135. errs() << "Unable to create target asm info!";
  136. abort();
  137. }
  138. MCObjectFileInfo MOFI;
  139. MCContext Ctx(MAI.get(), MRI.get(), &MOFI, &SrcMgr);
  140. static const bool UsePIC = false;
  141. MOFI.InitMCObjectFileInfo(TheTriple, UsePIC, Ctx);
  142. const unsigned OutputAsmVariant = 0;
  143. std::unique_ptr<MCInstrInfo> MCII(TheTarget->createMCInstrInfo());
  144. MCInstPrinter *IP = TheTarget->createMCInstPrinter(Triple(TripleName), OutputAsmVariant,
  145. *MAI, *MCII, *MRI);
  146. if (!IP) {
  147. errs()
  148. << "error: unable to create instruction printer for target triple '"
  149. << TheTriple.normalize() << "' with assembly variant "
  150. << OutputAsmVariant << ".\n";
  151. abort();
  152. }
  153. const char *ProgName = "llvm-mc-fuzzer";
  154. std::unique_ptr<MCSubtargetInfo> STI(
  155. TheTarget->createMCSubtargetInfo(TripleName, MCPU, FeaturesStr));
  156. MCCodeEmitter *CE = nullptr;
  157. MCAsmBackend *MAB = nullptr;
  158. MCTargetOptions MCOptions = InitMCTargetOptionsFromFlags();
  159. std::string OutputString;
  160. raw_string_ostream Out(OutputString);
  161. auto FOut = llvm::make_unique<formatted_raw_ostream>(Out);
  162. std::unique_ptr<MCStreamer> Str;
  163. if (FileType == OFT_AssemblyFile) {
  164. Str.reset(TheTarget->createAsmStreamer(
  165. Ctx, std::move(FOut), AsmVerbose,
  166. UseDwarfDirectory, IP, CE, MAB, ShowInst));
  167. } else {
  168. assert(FileType == OFT_ObjectFile && "Invalid file type!");
  169. std::error_code EC;
  170. const std::string OutputFilename = "-";
  171. auto Out =
  172. llvm::make_unique<ToolOutputFile>(OutputFilename, EC, sys::fs::F_None);
  173. if (EC) {
  174. errs() << EC.message() << '\n';
  175. abort();
  176. }
  177. // Don't waste memory on names of temp labels.
  178. Ctx.setUseNamesOnTempLabels(false);
  179. std::unique_ptr<buffer_ostream> BOS;
  180. raw_pwrite_stream *OS = &Out->os();
  181. if (!Out->os().supportsSeeking()) {
  182. BOS = make_unique<buffer_ostream>(Out->os());
  183. OS = BOS.get();
  184. }
  185. MCCodeEmitter *CE = TheTarget->createMCCodeEmitter(*MCII, *MRI, Ctx);
  186. MCAsmBackend *MAB = TheTarget->createMCAsmBackend(*MRI, TripleName, MCPU,
  187. MCOptions);
  188. Str.reset(TheTarget->createMCObjectStreamer(
  189. TheTriple, Ctx, *MAB, *OS, CE, *STI, MCOptions.MCRelaxAll,
  190. MCOptions.MCIncrementalLinkerCompatible,
  191. /*DWARFMustBeAtTheEnd*/ false));
  192. }
  193. const int Res = AssembleInput(ProgName, TheTarget, SrcMgr, Ctx, *Str, *MAI, *STI,
  194. *MCII, MCOptions);
  195. (void) Res;
  196. return 0;
  197. }
  198. extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data, size_t Size) {
  199. return AssembleOneInput(Data, Size);
  200. }
  201. extern "C" LLVM_ATTRIBUTE_USED int LLVMFuzzerInitialize(int *argc,
  202. char ***argv) {
  203. // The command line is unusual compared to other fuzzers due to the need to
  204. // specify the target. Options like -triple, -mcpu, and -mattr work like
  205. // their counterparts in llvm-mc, while -fuzzer-args collects options for the
  206. // fuzzer itself.
  207. //
  208. // Examples:
  209. //
  210. // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
  211. // 4-bytes each and use the contents of ./corpus as the test corpus:
  212. // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
  213. // -fuzzer-args -max_len=4 -runs=100000 ./corpus
  214. //
  215. // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
  216. // feature enabled using up to 64-byte inputs:
  217. // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
  218. // -disassemble -fuzzer-args ./corpus
  219. //
  220. // If your aim is to find instructions that are not tested, then it is
  221. // advisable to constrain the maximum input size to a single instruction
  222. // using -max_len as in the first example. This results in a test corpus of
  223. // individual instructions that test unique paths. Without this constraint,
  224. // there will be considerable redundancy in the corpus.
  225. char **OriginalArgv = *argv;
  226. LLVMInitializeAllTargetInfos();
  227. LLVMInitializeAllTargetMCs();
  228. LLVMInitializeAllAsmParsers();
  229. cl::ParseCommandLineOptions(*argc, OriginalArgv);
  230. // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
  231. // the driver can parse its arguments.
  232. //
  233. // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
  234. // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
  235. // non-const buffer to avoid the need to clean up when the fuzzer terminates.
  236. ModifiedArgv.push_back(OriginalArgv[0]);
  237. for (const auto &FuzzerArg : FuzzerArgs) {
  238. for (int i = 1; i < *argc; ++i) {
  239. if (FuzzerArg == OriginalArgv[i])
  240. ModifiedArgv.push_back(OriginalArgv[i]);
  241. }
  242. }
  243. *argc = ModifiedArgv.size();
  244. *argv = ModifiedArgv.data();
  245. // Package up features to be passed to target/subtarget
  246. // We have to pass it via a global since the callback doesn't
  247. // permit any user data.
  248. if (MAttrs.size()) {
  249. SubtargetFeatures Features;
  250. for (unsigned i = 0; i != MAttrs.size(); ++i)
  251. Features.AddFeature(MAttrs[i]);
  252. FeaturesStr = Features.getString();
  253. }
  254. if (TripleName.empty())
  255. TripleName = sys::getDefaultTargetTriple();
  256. return 0;
  257. }