123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138 |
- //===-- MCFunction.cpp ----------------------------------------------------===//
- //
- // The LLVM Compiler Infrastructure
- //
- // This file is distributed under the University of Illinois Open Source
- // License. See LICENSE.TXT for details.
- //
- //===----------------------------------------------------------------------===//
- //
- // This file defines the algorithm to break down a region of machine code
- // into basic blocks and try to reconstruct a CFG from it.
- //
- //===----------------------------------------------------------------------===//
- #include "MCFunction.h"
- #include "llvm/ADT/STLExtras.h"
- #include "llvm/MC/MCDisassembler.h"
- #include "llvm/MC/MCInst.h"
- #include "llvm/MC/MCInstPrinter.h"
- #include "llvm/MC/MCInstrAnalysis.h"
- #include "llvm/MC/MCInstrDesc.h"
- #include "llvm/MC/MCInstrInfo.h"
- #include "llvm/Support/MemoryObject.h"
- #include "llvm/Support/raw_ostream.h"
- #include "llvm/Support/system_error.h"
- #include <set>
- using namespace llvm;
- MCFunction
- MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
- const MemoryObject &Region, uint64_t Start,
- uint64_t End, const MCInstrAnalysis *Ana,
- raw_ostream &DebugOut,
- SmallVectorImpl<uint64_t> &Calls) {
- std::vector<MCDecodedInst> Instructions;
- std::set<uint64_t> Splits;
- Splits.insert(Start);
- uint64_t Size;
- MCFunction f(Name);
- {
- DenseSet<uint64_t> VisitedInsts;
- SmallVector<uint64_t, 16> WorkList;
- WorkList.push_back(Start);
- // Disassemble code and gather basic block split points.
- while (!WorkList.empty()) {
- uint64_t Index = WorkList.pop_back_val();
- if (VisitedInsts.find(Index) != VisitedInsts.end())
- continue; // Already visited this location.
- for (;Index < End; Index += Size) {
- VisitedInsts.insert(Index);
- MCInst Inst;
- if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){
- Instructions.push_back(MCDecodedInst(Index, Size, Inst));
- if (Ana->isBranch(Inst)) {
- uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
- if (targ != -1ULL && targ == Index+Size)
- continue; // Skip nop jumps.
- // If we could determine the branch target, make a note to start a
- // new basic block there and add the target to the worklist.
- if (targ != -1ULL) {
- Splits.insert(targ);
- WorkList.push_back(targ);
- WorkList.push_back(Index+Size);
- }
- Splits.insert(Index+Size);
- break;
- } else if (Ana->isReturn(Inst)) {
- // Return instruction. This basic block ends here.
- Splits.insert(Index+Size);
- break;
- } else if (Ana->isCall(Inst)) {
- uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
- // Add the call to the call list if the destination is known.
- if (targ != -1ULL && targ != Index+Size)
- Calls.push_back(targ);
- }
- } else {
- errs().write_hex(Index) << ": warning: invalid instruction encoding\n";
- if (Size == 0)
- Size = 1; // skip illegible bytes
- }
- }
- }
- }
- // Make sure the instruction list is sorted.
- std::sort(Instructions.begin(), Instructions.end());
- // Create basic blocks.
- unsigned ii = 0, ie = Instructions.size();
- for (std::set<uint64_t>::iterator spi = Splits.begin(),
- spe = llvm::prior(Splits.end()); spi != spe; ++spi) {
- MCBasicBlock BB;
- uint64_t BlockEnd = *llvm::next(spi);
- // Add instructions to the BB.
- for (; ii != ie; ++ii) {
- if (Instructions[ii].Address < *spi ||
- Instructions[ii].Address >= BlockEnd)
- break;
- BB.addInst(Instructions[ii]);
- }
- f.addBlock(*spi, BB);
- }
- std::sort(f.Blocks.begin(), f.Blocks.end());
- // Calculate successors of each block.
- for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
- MCBasicBlock &BB = const_cast<MCBasicBlock&>(i->second);
- if (BB.getInsts().empty()) continue;
- const MCDecodedInst &Inst = BB.getInsts().back();
- if (Ana->isBranch(Inst.Inst)) {
- uint64_t targ = Ana->evaluateBranch(Inst.Inst, Inst.Address, Inst.Size);
- if (targ == -1ULL) {
- // Indirect branch. Bail and add all blocks of the function as a
- // successor.
- for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i)
- BB.addSucc(i->first);
- } else if (targ != Inst.Address+Inst.Size)
- BB.addSucc(targ);
- // Conditional branches can also fall through to the next block.
- if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e)
- BB.addSucc(llvm::next(i)->first);
- } else {
- // No branch. Fall through to the next block.
- if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e)
- BB.addSucc(llvm::next(i)->first);
- }
- }
- return f;
- }
|