EDDisassembler.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. //===-EDDisassembler.cpp - LLVM Enhanced Disassembler ---------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements the Enhanced Disassembly library's disassembler class.
  11. // The disassembler is responsible for vending individual instructions according
  12. // to a given architecture and disassembly syntax.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "EDDisassembler.h"
  16. #include "EDInst.h"
  17. #include "llvm/MC/EDInstInfo.h"
  18. #include "llvm/MC/MCAsmInfo.h"
  19. #include "llvm/MC/MCContext.h"
  20. #include "llvm/MC/MCDisassembler.h"
  21. #include "llvm/MC/MCExpr.h"
  22. #include "llvm/MC/MCInst.h"
  23. #include "llvm/MC/MCInstPrinter.h"
  24. #include "llvm/MC/MCRegisterInfo.h"
  25. #include "llvm/MC/MCStreamer.h"
  26. #include "llvm/MC/MCSubtargetInfo.h"
  27. #include "llvm/MC/MCParser/AsmLexer.h"
  28. #include "llvm/MC/MCParser/MCAsmParser.h"
  29. #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
  30. #include "llvm/MC/MCTargetAsmLexer.h"
  31. #include "llvm/MC/MCTargetAsmParser.h"
  32. #include "llvm/Support/MemoryBuffer.h"
  33. #include "llvm/Support/MemoryObject.h"
  34. #include "llvm/Support/SourceMgr.h"
  35. #include "llvm/Support/TargetRegistry.h"
  36. using namespace llvm;
  37. EDDisassembler::DisassemblerMap_t EDDisassembler::sDisassemblers;
  38. struct TripleMap {
  39. Triple::ArchType Arch;
  40. const char *String;
  41. };
  42. static struct TripleMap triplemap[] = {
  43. { Triple::x86, "i386-unknown-unknown" },
  44. { Triple::x86_64, "x86_64-unknown-unknown" },
  45. { Triple::arm, "arm-unknown-unknown" },
  46. { Triple::thumb, "thumb-unknown-unknown" }
  47. };
  48. /// infoFromArch - Returns the TripleMap corresponding to a given architecture,
  49. /// or NULL if there is an error
  50. ///
  51. /// @arg arch - The Triple::ArchType for the desired architecture
  52. static const char *tripleFromArch(Triple::ArchType arch) {
  53. unsigned int infoIndex;
  54. for (infoIndex = 0; triplemap[infoIndex].String != NULL; ++infoIndex) {
  55. if (arch == triplemap[infoIndex].Arch)
  56. return triplemap[infoIndex].String;
  57. }
  58. return NULL;
  59. }
  60. /// getLLVMSyntaxVariant - gets the constant to use to get an assembly printer
  61. /// for the desired assembly syntax, suitable for passing to
  62. /// Target::createMCInstPrinter()
  63. ///
  64. /// @arg arch - The target architecture
  65. /// @arg syntax - The assembly syntax in sd form
  66. static int getLLVMSyntaxVariant(Triple::ArchType arch,
  67. EDDisassembler::AssemblySyntax syntax) {
  68. switch (syntax) {
  69. // Mappings below from X86AsmPrinter.cpp
  70. case EDDisassembler::kEDAssemblySyntaxX86ATT:
  71. if (arch == Triple::x86 || arch == Triple::x86_64)
  72. return 0;
  73. break;
  74. case EDDisassembler::kEDAssemblySyntaxX86Intel:
  75. if (arch == Triple::x86 || arch == Triple::x86_64)
  76. return 1;
  77. break;
  78. case EDDisassembler::kEDAssemblySyntaxARMUAL:
  79. if (arch == Triple::arm || arch == Triple::thumb)
  80. return 0;
  81. break;
  82. }
  83. return -1;
  84. }
  85. EDDisassembler *EDDisassembler::getDisassembler(Triple::ArchType arch,
  86. AssemblySyntax syntax) {
  87. const char *triple = tripleFromArch(arch);
  88. return getDisassembler(StringRef(triple), syntax);
  89. }
  90. EDDisassembler *EDDisassembler::getDisassembler(StringRef str,
  91. AssemblySyntax syntax) {
  92. CPUKey key;
  93. key.Triple = str.str();
  94. key.Syntax = syntax;
  95. EDDisassembler::DisassemblerMap_t::iterator i = sDisassemblers.find(key);
  96. if (i != sDisassemblers.end()) {
  97. return i->second;
  98. }
  99. EDDisassembler *sdd = new EDDisassembler(key);
  100. if (!sdd->valid()) {
  101. delete sdd;
  102. return NULL;
  103. }
  104. sDisassemblers[key] = sdd;
  105. return sdd;
  106. }
  107. EDDisassembler::EDDisassembler(CPUKey &key) :
  108. Valid(false),
  109. HasSemantics(false),
  110. ErrorStream(nulls()),
  111. Key(key),
  112. TgtTriple(key.Triple.c_str()) {
  113. LLVMSyntaxVariant = getLLVMSyntaxVariant(TgtTriple.getArch(), key.Syntax);
  114. if (LLVMSyntaxVariant < 0)
  115. return;
  116. std::string tripleString(key.Triple);
  117. std::string errorString;
  118. Tgt = TargetRegistry::lookupTarget(key.Triple,
  119. errorString);
  120. if (!Tgt)
  121. return;
  122. MRI.reset(Tgt->createMCRegInfo(tripleString));
  123. if (!MRI)
  124. return;
  125. initMaps(*MRI);
  126. AsmInfo.reset(Tgt->createMCAsmInfo(tripleString));
  127. if (!AsmInfo)
  128. return;
  129. STI.reset(Tgt->createMCSubtargetInfo(tripleString, "", ""));
  130. if (!STI)
  131. return;
  132. Disassembler.reset(Tgt->createMCDisassembler(*STI));
  133. if (!Disassembler)
  134. return;
  135. InstInfos = Disassembler->getEDInfo();
  136. InstString.reset(new std::string);
  137. InstStream.reset(new raw_string_ostream(*InstString));
  138. InstPrinter.reset(Tgt->createMCInstPrinter(LLVMSyntaxVariant, *AsmInfo, *STI));
  139. if (!InstPrinter)
  140. return;
  141. GenericAsmLexer.reset(new AsmLexer(*AsmInfo));
  142. SpecificAsmLexer.reset(Tgt->createMCAsmLexer(*MRI, *AsmInfo));
  143. SpecificAsmLexer->InstallLexer(*GenericAsmLexer);
  144. initMaps(*MRI);
  145. Valid = true;
  146. }
  147. EDDisassembler::~EDDisassembler() {
  148. if (!valid())
  149. return;
  150. }
  151. namespace {
  152. /// EDMemoryObject - a subclass of MemoryObject that allows use of a callback
  153. /// as provided by the sd interface. See MemoryObject.
  154. class EDMemoryObject : public llvm::MemoryObject {
  155. private:
  156. EDByteReaderCallback Callback;
  157. void *Arg;
  158. public:
  159. EDMemoryObject(EDByteReaderCallback callback,
  160. void *arg) : Callback(callback), Arg(arg) { }
  161. ~EDMemoryObject() { }
  162. uint64_t getBase() const { return 0x0; }
  163. uint64_t getExtent() const { return (uint64_t)-1; }
  164. int readByte(uint64_t address, uint8_t *ptr) const {
  165. if (!Callback)
  166. return -1;
  167. if (Callback(ptr, address, Arg))
  168. return -1;
  169. return 0;
  170. }
  171. };
  172. }
  173. EDInst *EDDisassembler::createInst(EDByteReaderCallback byteReader,
  174. uint64_t address,
  175. void *arg) {
  176. EDMemoryObject memoryObject(byteReader, arg);
  177. MCInst* inst = new MCInst;
  178. uint64_t byteSize;
  179. MCDisassembler::DecodeStatus S;
  180. S = Disassembler->getInstruction(*inst, byteSize, memoryObject, address,
  181. ErrorStream, nulls());
  182. switch (S) {
  183. case MCDisassembler::Fail:
  184. case MCDisassembler::SoftFail:
  185. // FIXME: Do something different on soft failure mode?
  186. delete inst;
  187. return NULL;
  188. case MCDisassembler::Success: {
  189. const llvm::EDInstInfo *thisInstInfo = NULL;
  190. if (InstInfos) {
  191. thisInstInfo = &InstInfos[inst->getOpcode()];
  192. }
  193. EDInst* sdInst = new EDInst(inst, byteSize, *this, thisInstInfo);
  194. return sdInst;
  195. }
  196. }
  197. return NULL;
  198. }
  199. void EDDisassembler::initMaps(const MCRegisterInfo &registerInfo) {
  200. unsigned numRegisters = registerInfo.getNumRegs();
  201. unsigned registerIndex;
  202. for (registerIndex = 0; registerIndex < numRegisters; ++registerIndex) {
  203. const char* registerName = registerInfo.get(registerIndex).Name;
  204. RegVec.push_back(registerName);
  205. RegRMap[registerName] = registerIndex;
  206. }
  207. switch (TgtTriple.getArch()) {
  208. default:
  209. break;
  210. case Triple::x86:
  211. case Triple::x86_64:
  212. stackPointers.insert(registerIDWithName("SP"));
  213. stackPointers.insert(registerIDWithName("ESP"));
  214. stackPointers.insert(registerIDWithName("RSP"));
  215. programCounters.insert(registerIDWithName("IP"));
  216. programCounters.insert(registerIDWithName("EIP"));
  217. programCounters.insert(registerIDWithName("RIP"));
  218. break;
  219. case Triple::arm:
  220. case Triple::thumb:
  221. stackPointers.insert(registerIDWithName("SP"));
  222. programCounters.insert(registerIDWithName("PC"));
  223. break;
  224. }
  225. }
  226. const char *EDDisassembler::nameWithRegisterID(unsigned registerID) const {
  227. if (registerID >= RegVec.size())
  228. return NULL;
  229. else
  230. return RegVec[registerID].c_str();
  231. }
  232. unsigned EDDisassembler::registerIDWithName(const char *name) const {
  233. regrmap_t::const_iterator iter = RegRMap.find(std::string(name));
  234. if (iter == RegRMap.end())
  235. return 0;
  236. else
  237. return (*iter).second;
  238. }
  239. bool EDDisassembler::registerIsStackPointer(unsigned registerID) {
  240. return (stackPointers.find(registerID) != stackPointers.end());
  241. }
  242. bool EDDisassembler::registerIsProgramCounter(unsigned registerID) {
  243. return (programCounters.find(registerID) != programCounters.end());
  244. }
  245. int EDDisassembler::printInst(std::string &str, MCInst &inst) {
  246. PrinterMutex.acquire();
  247. InstPrinter->printInst(&inst, *InstStream, "");
  248. InstStream->flush();
  249. str = *InstString;
  250. InstString->clear();
  251. PrinterMutex.release();
  252. return 0;
  253. }
  254. static void diag_handler(const SMDiagnostic &diag, void *context) {
  255. if (context)
  256. diag.print("", static_cast<EDDisassembler*>(context)->ErrorStream);
  257. }
  258. int EDDisassembler::parseInst(SmallVectorImpl<MCParsedAsmOperand*> &operands,
  259. SmallVectorImpl<AsmToken> &tokens,
  260. const std::string &str) {
  261. int ret = 0;
  262. switch (TgtTriple.getArch()) {
  263. default:
  264. return -1;
  265. case Triple::x86:
  266. case Triple::x86_64:
  267. case Triple::arm:
  268. case Triple::thumb:
  269. break;
  270. }
  271. const char *cStr = str.c_str();
  272. MemoryBuffer *buf = MemoryBuffer::getMemBuffer(cStr, cStr + strlen(cStr));
  273. StringRef instName;
  274. SMLoc instLoc;
  275. SourceMgr sourceMgr;
  276. sourceMgr.setDiagHandler(diag_handler, static_cast<void*>(this));
  277. sourceMgr.AddNewSourceBuffer(buf, SMLoc()); // ownership of buf handed over
  278. MCContext context(*AsmInfo, *MRI, NULL);
  279. OwningPtr<MCStreamer> streamer(createNullStreamer(context));
  280. OwningPtr<MCAsmParser> genericParser(createMCAsmParser(sourceMgr,
  281. context, *streamer,
  282. *AsmInfo));
  283. OwningPtr<MCSubtargetInfo> STI(Tgt->createMCSubtargetInfo(Key.Triple.c_str(), "", ""));
  284. OwningPtr<MCTargetAsmParser>
  285. TargetParser(Tgt->createMCAsmParser(*STI, *genericParser));
  286. AsmToken OpcodeToken = genericParser->Lex();
  287. AsmToken NextToken = genericParser->Lex(); // consume next token, because specificParser expects us to
  288. if (OpcodeToken.is(AsmToken::Identifier)) {
  289. instName = OpcodeToken.getString();
  290. instLoc = OpcodeToken.getLoc();
  291. if (NextToken.isNot(AsmToken::Eof) &&
  292. TargetParser->ParseInstruction(instName, instLoc, operands))
  293. ret = -1;
  294. } else {
  295. ret = -1;
  296. }
  297. ParserMutex.acquire();
  298. if (!ret) {
  299. GenericAsmLexer->setBuffer(buf);
  300. while (SpecificAsmLexer->Lex(),
  301. SpecificAsmLexer->isNot(AsmToken::Eof) &&
  302. SpecificAsmLexer->isNot(AsmToken::EndOfStatement)) {
  303. if (SpecificAsmLexer->is(AsmToken::Error)) {
  304. ret = -1;
  305. break;
  306. }
  307. tokens.push_back(SpecificAsmLexer->getTok());
  308. }
  309. }
  310. ParserMutex.release();
  311. return ret;
  312. }
  313. int EDDisassembler::llvmSyntaxVariant() const {
  314. return LLVMSyntaxVariant;
  315. }