MCFunction.cpp 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. //===-- MCFunction.cpp ----------------------------------------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file defines the algorithm to break down a region of machine code
  11. // into basic blocks and try to reconstruct a CFG from it.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "MCFunction.h"
  15. #include "llvm/ADT/STLExtras.h"
  16. #include "llvm/MC/MCDisassembler.h"
  17. #include "llvm/MC/MCInst.h"
  18. #include "llvm/MC/MCInstPrinter.h"
  19. #include "llvm/MC/MCInstrAnalysis.h"
  20. #include "llvm/MC/MCInstrDesc.h"
  21. #include "llvm/MC/MCInstrInfo.h"
  22. #include "llvm/Support/MemoryObject.h"
  23. #include "llvm/Support/raw_ostream.h"
  24. #include "llvm/Support/system_error.h"
  25. #include <set>
  26. using namespace llvm;
  27. MCFunction
  28. MCFunction::createFunctionFromMC(StringRef Name, const MCDisassembler *DisAsm,
  29. const MemoryObject &Region, uint64_t Start,
  30. uint64_t End, const MCInstrAnalysis *Ana,
  31. raw_ostream &DebugOut,
  32. SmallVectorImpl<uint64_t> &Calls) {
  33. std::vector<MCDecodedInst> Instructions;
  34. std::set<uint64_t> Splits;
  35. Splits.insert(Start);
  36. uint64_t Size;
  37. MCFunction f(Name);
  38. {
  39. DenseSet<uint64_t> VisitedInsts;
  40. SmallVector<uint64_t, 16> WorkList;
  41. WorkList.push_back(Start);
  42. // Disassemble code and gather basic block split points.
  43. while (!WorkList.empty()) {
  44. uint64_t Index = WorkList.pop_back_val();
  45. if (VisitedInsts.find(Index) != VisitedInsts.end())
  46. continue; // Already visited this location.
  47. for (;Index < End; Index += Size) {
  48. VisitedInsts.insert(Index);
  49. MCInst Inst;
  50. if (DisAsm->getInstruction(Inst, Size, Region, Index, DebugOut, nulls())){
  51. Instructions.push_back(MCDecodedInst(Index, Size, Inst));
  52. if (Ana->isBranch(Inst)) {
  53. uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
  54. if (targ != -1ULL && targ == Index+Size)
  55. continue; // Skip nop jumps.
  56. // If we could determine the branch target, make a note to start a
  57. // new basic block there and add the target to the worklist.
  58. if (targ != -1ULL) {
  59. Splits.insert(targ);
  60. WorkList.push_back(targ);
  61. WorkList.push_back(Index+Size);
  62. }
  63. Splits.insert(Index+Size);
  64. break;
  65. } else if (Ana->isReturn(Inst)) {
  66. // Return instruction. This basic block ends here.
  67. Splits.insert(Index+Size);
  68. break;
  69. } else if (Ana->isCall(Inst)) {
  70. uint64_t targ = Ana->evaluateBranch(Inst, Index, Size);
  71. // Add the call to the call list if the destination is known.
  72. if (targ != -1ULL && targ != Index+Size)
  73. Calls.push_back(targ);
  74. }
  75. } else {
  76. errs().write_hex(Index) << ": warning: invalid instruction encoding\n";
  77. if (Size == 0)
  78. Size = 1; // skip illegible bytes
  79. }
  80. }
  81. }
  82. }
  83. // Make sure the instruction list is sorted.
  84. std::sort(Instructions.begin(), Instructions.end());
  85. // Create basic blocks.
  86. unsigned ii = 0, ie = Instructions.size();
  87. for (std::set<uint64_t>::iterator spi = Splits.begin(),
  88. spe = llvm::prior(Splits.end()); spi != spe; ++spi) {
  89. MCBasicBlock BB;
  90. uint64_t BlockEnd = *llvm::next(spi);
  91. // Add instructions to the BB.
  92. for (; ii != ie; ++ii) {
  93. if (Instructions[ii].Address < *spi ||
  94. Instructions[ii].Address >= BlockEnd)
  95. break;
  96. BB.addInst(Instructions[ii]);
  97. }
  98. f.addBlock(*spi, BB);
  99. }
  100. std::sort(f.Blocks.begin(), f.Blocks.end());
  101. // Calculate successors of each block.
  102. for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i) {
  103. MCBasicBlock &BB = const_cast<MCBasicBlock&>(i->second);
  104. if (BB.getInsts().empty()) continue;
  105. const MCDecodedInst &Inst = BB.getInsts().back();
  106. if (Ana->isBranch(Inst.Inst)) {
  107. uint64_t targ = Ana->evaluateBranch(Inst.Inst, Inst.Address, Inst.Size);
  108. if (targ == -1ULL) {
  109. // Indirect branch. Bail and add all blocks of the function as a
  110. // successor.
  111. for (MCFunction::iterator i = f.begin(), e = f.end(); i != e; ++i)
  112. BB.addSucc(i->first);
  113. } else if (targ != Inst.Address+Inst.Size)
  114. BB.addSucc(targ);
  115. // Conditional branches can also fall through to the next block.
  116. if (Ana->isConditionalBranch(Inst.Inst) && llvm::next(i) != e)
  117. BB.addSucc(llvm::next(i)->first);
  118. } else {
  119. // No branch. Fall through to the next block.
  120. if (!Ana->isReturn(Inst.Inst) && llvm::next(i) != e)
  121. BB.addSucc(llvm::next(i)->first);
  122. }
  123. }
  124. return f;
  125. }