123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135 |
- //===-- X86Disassembler.h - Disassembler for x86 and x86_64 -----*- C++ -*-===//
- //
- // The LLVM Compiler Infrastructure
- //
- // This file is distributed under the University of Illinois Open Source
- // License. See LICENSE.TXT for details.
- //
- //===----------------------------------------------------------------------===//
- //
- // The X86 disassembler is a table-driven disassembler for the 16-, 32-, and
- // 64-bit X86 instruction sets. The main decode sequence for an assembly
- // instruction in this disassembler is:
- //
- // 1. Read the prefix bytes and determine the attributes of the instruction.
- // These attributes, recorded in enum attributeBits
- // (X86DisassemblerDecoderCommon.h), form a bitmask. The table CONTEXTS_SYM
- // provides a mapping from bitmasks to contexts, which are represented by
- // enum InstructionContext (ibid.).
- //
- // 2. Read the opcode, and determine what kind of opcode it is. The
- // disassembler distinguishes four kinds of opcodes, which are enumerated in
- // OpcodeType (X86DisassemblerDecoderCommon.h): one-byte (0xnn), two-byte
- // (0x0f 0xnn), three-byte-38 (0x0f 0x38 0xnn), or three-byte-3a
- // (0x0f 0x3a 0xnn). Mandatory prefixes are treated as part of the context.
- //
- // 3. Depending on the opcode type, look in one of four ClassDecision structures
- // (X86DisassemblerDecoderCommon.h). Use the opcode class to determine which
- // OpcodeDecision (ibid.) to look the opcode in. Look up the opcode, to get
- // a ModRMDecision (ibid.).
- //
- // 4. Some instructions, such as escape opcodes or extended opcodes, or even
- // instructions that have ModRM*Reg / ModRM*Mem forms in LLVM, need the
- // ModR/M byte to complete decode. The ModRMDecision's type is an entry from
- // ModRMDecisionType (X86DisassemblerDecoderCommon.h) that indicates if the
- // ModR/M byte is required and how to interpret it.
- //
- // 5. After resolving the ModRMDecision, the disassembler has a unique ID
- // of type InstrUID (X86DisassemblerDecoderCommon.h). Looking this ID up in
- // INSTRUCTIONS_SYM yields the name of the instruction and the encodings and
- // meanings of its operands.
- //
- // 6. For each operand, its encoding is an entry from OperandEncoding
- // (X86DisassemblerDecoderCommon.h) and its type is an entry from
- // OperandType (ibid.). The encoding indicates how to read it from the
- // instruction; the type indicates how to interpret the value once it has
- // been read. For example, a register operand could be stored in the R/M
- // field of the ModR/M byte, the REG field of the ModR/M byte, or added to
- // the main opcode. This is orthogonal from its meaning (an GPR or an XMM
- // register, for instance). Given this information, the operands can be
- // extracted and interpreted.
- //
- // 7. As the last step, the disassembler translates the instruction information
- // and operands into a format understandable by the client - in this case, an
- // MCInst for use by the MC infrastructure.
- //
- // The disassembler is broken broadly into two parts: the table emitter that
- // emits the instruction decode tables discussed above during compilation, and
- // the disassembler itself. The table emitter is documented in more detail in
- // utils/TableGen/X86DisassemblerEmitter.h.
- //
- // X86Disassembler.h contains the public interface for the disassembler,
- // adhering to the MCDisassembler interface.
- // X86Disassembler.cpp contains the code responsible for step 7, and for
- // invoking the decoder to execute steps 1-6.
- // X86DisassemblerDecoderCommon.h contains the definitions needed by both the
- // table emitter and the disassembler.
- // X86DisassemblerDecoder.h contains the public interface of the decoder,
- // factored out into C for possible use by other projects.
- // X86DisassemblerDecoder.c contains the source code of the decoder, which is
- // responsible for steps 1-6.
- //
- //===----------------------------------------------------------------------===//
- #ifndef X86DISASSEMBLER_H
- #define X86DISASSEMBLER_H
- #define INSTRUCTION_SPECIFIER_FIELDS \
- const char* name;
- #define INSTRUCTION_IDS \
- unsigned instructionIDs;
- #include "X86DisassemblerDecoderCommon.h"
- #undef INSTRUCTION_SPECIFIER_FIELDS
- #undef INSTRUCTION_IDS
- #include "llvm/MC/MCDisassembler.h"
- namespace llvm {
-
- class MCInst;
- class MCInstrInfo;
- class MCSubtargetInfo;
- class MemoryObject;
- class raw_ostream;
- struct EDInstInfo;
-
- namespace X86Disassembler {
- /// X86GenericDisassembler - Generic disassembler for all X86 platforms.
- /// All each platform class should have to do is subclass the constructor, and
- /// provide a different disassemblerMode value.
- class X86GenericDisassembler : public MCDisassembler {
- const MCInstrInfo *MII;
- public:
- /// Constructor - Initializes the disassembler.
- ///
- /// @param mode - The X86 architecture mode to decode for.
- X86GenericDisassembler(const MCSubtargetInfo &STI, DisassemblerMode mode,
- const MCInstrInfo *MII);
- private:
- ~X86GenericDisassembler();
- public:
- /// getInstruction - See MCDisassembler.
- DecodeStatus getInstruction(MCInst &instr,
- uint64_t &size,
- const MemoryObject ®ion,
- uint64_t address,
- raw_ostream &vStream,
- raw_ostream &cStream) const;
- /// getEDInfo - See MCDisassembler.
- const EDInstInfo *getEDInfo() const;
- private:
- DisassemblerMode fMode;
- };
- } // namespace X86Disassembler
- } // namespace llvm
- #endif
|