ReaderWrappers.cpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420
  1. //===- ReaderWrappers.cpp - Parse bytecode from file or buffer -----------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file was developed by the LLVM research group and is distributed under
  6. // the University of Illinois Open Source License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements loading and parsing a bytecode file and parsing a
  11. // bytecode module from a given buffer.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/Bytecode/Analyzer.h"
  15. #include "llvm/Bytecode/Reader.h"
  16. #include "Reader.h"
  17. #include "llvm/Module.h"
  18. #include "llvm/Instructions.h"
  19. #include "llvm/ADT/StringExtras.h"
  20. #include "llvm/System/MappedFile.h"
  21. #include <cerrno>
  22. #include <iostream>
  23. using namespace llvm;
  24. //===----------------------------------------------------------------------===//
  25. // BytecodeFileReader - Read from an mmap'able file descriptor.
  26. //
  27. namespace {
  28. /// BytecodeFileReader - parses a bytecode file from a file
  29. ///
  30. class BytecodeFileReader : public BytecodeReader {
  31. private:
  32. sys::MappedFile mapFile;
  33. BytecodeFileReader(const BytecodeFileReader&); // Do not implement
  34. void operator=(const BytecodeFileReader &BFR); // Do not implement
  35. public:
  36. BytecodeFileReader(const std::string &Filename, llvm::BytecodeHandler* H=0);
  37. };
  38. }
  39. BytecodeFileReader::BytecodeFileReader(const std::string &Filename,
  40. llvm::BytecodeHandler* H )
  41. : BytecodeReader(H)
  42. , mapFile( sys::Path(Filename))
  43. {
  44. mapFile.map();
  45. unsigned char* buffer = reinterpret_cast<unsigned char*>(mapFile.base());
  46. ParseBytecode(buffer, mapFile.size(), Filename);
  47. }
  48. //===----------------------------------------------------------------------===//
  49. // BytecodeBufferReader - Read from a memory buffer
  50. //
  51. namespace {
  52. /// BytecodeBufferReader - parses a bytecode file from a buffer
  53. ///
  54. class BytecodeBufferReader : public BytecodeReader {
  55. private:
  56. const unsigned char *Buffer;
  57. bool MustDelete;
  58. BytecodeBufferReader(const BytecodeBufferReader&); // Do not implement
  59. void operator=(const BytecodeBufferReader &BFR); // Do not implement
  60. public:
  61. BytecodeBufferReader(const unsigned char *Buf, unsigned Length,
  62. const std::string &ModuleID,
  63. llvm::BytecodeHandler* Handler = 0);
  64. ~BytecodeBufferReader();
  65. };
  66. }
  67. BytecodeBufferReader::BytecodeBufferReader(const unsigned char *Buf,
  68. unsigned Length,
  69. const std::string &ModuleID,
  70. llvm::BytecodeHandler* H )
  71. : BytecodeReader(H)
  72. {
  73. // If not aligned, allocate a new buffer to hold the bytecode...
  74. const unsigned char *ParseBegin = 0;
  75. if (reinterpret_cast<uint64_t>(Buf) & 3) {
  76. Buffer = new unsigned char[Length+4];
  77. unsigned Offset = 4 - ((intptr_t)Buffer & 3); // Make sure it's aligned
  78. ParseBegin = Buffer + Offset;
  79. memcpy((unsigned char*)ParseBegin, Buf, Length); // Copy it over
  80. MustDelete = true;
  81. } else {
  82. // If we don't need to copy it over, just use the caller's copy
  83. ParseBegin = Buffer = Buf;
  84. MustDelete = false;
  85. }
  86. try {
  87. ParseBytecode(ParseBegin, Length, ModuleID);
  88. } catch (...) {
  89. if (MustDelete) delete [] Buffer;
  90. throw;
  91. }
  92. }
  93. BytecodeBufferReader::~BytecodeBufferReader() {
  94. if (MustDelete) delete [] Buffer;
  95. }
  96. //===----------------------------------------------------------------------===//
  97. // BytecodeStdinReader - Read bytecode from Standard Input
  98. //
  99. namespace {
  100. /// BytecodeStdinReader - parses a bytecode file from stdin
  101. ///
  102. class BytecodeStdinReader : public BytecodeReader {
  103. private:
  104. std::vector<unsigned char> FileData;
  105. unsigned char *FileBuf;
  106. BytecodeStdinReader(const BytecodeStdinReader&); // Do not implement
  107. void operator=(const BytecodeStdinReader &BFR); // Do not implement
  108. public:
  109. BytecodeStdinReader( llvm::BytecodeHandler* H = 0 );
  110. };
  111. }
  112. BytecodeStdinReader::BytecodeStdinReader( BytecodeHandler* H )
  113. : BytecodeReader(H)
  114. {
  115. char Buffer[4096*4];
  116. // Read in all of the data from stdin, we cannot mmap stdin...
  117. while (std::cin.good()) {
  118. std::cin.read(Buffer, 4096*4);
  119. int BlockSize = std::cin.gcount();
  120. if (0 >= BlockSize)
  121. break;
  122. FileData.insert(FileData.end(), Buffer, Buffer+BlockSize);
  123. }
  124. if (FileData.empty())
  125. throw std::string("Standard Input empty!");
  126. FileBuf = &FileData[0];
  127. ParseBytecode(FileBuf, FileData.size(), "<stdin>");
  128. }
  129. //===----------------------------------------------------------------------===//
  130. // Varargs transmogrification code...
  131. //
  132. // CheckVarargs - This is used to automatically translate old-style varargs to
  133. // new style varargs for backwards compatibility.
  134. static ModuleProvider* CheckVarargs(ModuleProvider* MP) {
  135. Module* M = MP->getModule();
  136. // check to see if va_start takes arguements...
  137. Function* F = M->getNamedFunction("llvm.va_start");
  138. if(F == 0) return MP; //No varargs use, just return.
  139. if (F->getFunctionType()->getNumParams() == 1)
  140. return MP; // Modern varargs processing, just return.
  141. // If we get to this point, we know that we have an old-style module.
  142. // Materialize the whole thing to perform the rewriting.
  143. MP->materializeModule();
  144. if(Function* F = M->getNamedFunction("llvm.va_start")) {
  145. assert(F->arg_size() == 0 && "Obsolete va_start takes 0 argument!");
  146. //foo = va_start()
  147. // ->
  148. //bar = alloca typeof(foo)
  149. //va_start(bar)
  150. //foo = load bar
  151. const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID);
  152. const Type* ArgTy = F->getFunctionType()->getReturnType();
  153. const Type* ArgTyPtr = PointerType::get(ArgTy);
  154. Function* NF = M->getOrInsertFunction("llvm.va_start",
  155. RetTy, ArgTyPtr, (Type *)0);
  156. for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;)
  157. if (CallInst* CI = dyn_cast<CallInst>(*I++)) {
  158. AllocaInst* bar = new AllocaInst(ArgTy, 0, "vastart.fix.1", CI);
  159. new CallInst(NF, bar, "", CI);
  160. Value* foo = new LoadInst(bar, "vastart.fix.2", CI);
  161. CI->replaceAllUsesWith(foo);
  162. CI->getParent()->getInstList().erase(CI);
  163. }
  164. F->setName("");
  165. }
  166. if(Function* F = M->getNamedFunction("llvm.va_end")) {
  167. assert(F->arg_size() == 1 && "Obsolete va_end takes 1 argument!");
  168. //vaend foo
  169. // ->
  170. //bar = alloca 1 of typeof(foo)
  171. //vaend bar
  172. const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID);
  173. const Type* ArgTy = F->getFunctionType()->getParamType(0);
  174. const Type* ArgTyPtr = PointerType::get(ArgTy);
  175. Function* NF = M->getOrInsertFunction("llvm.va_end",
  176. RetTy, ArgTyPtr, (Type *)0);
  177. for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;)
  178. if (CallInst* CI = dyn_cast<CallInst>(*I++)) {
  179. AllocaInst* bar = new AllocaInst(ArgTy, 0, "vaend.fix.1", CI);
  180. new StoreInst(CI->getOperand(1), bar, CI);
  181. new CallInst(NF, bar, "", CI);
  182. CI->getParent()->getInstList().erase(CI);
  183. }
  184. F->setName("");
  185. }
  186. if(Function* F = M->getNamedFunction("llvm.va_copy")) {
  187. assert(F->arg_size() == 1 && "Obsolete va_copy takes 1 argument!");
  188. //foo = vacopy(bar)
  189. // ->
  190. //a = alloca 1 of typeof(foo)
  191. //b = alloca 1 of typeof(foo)
  192. //store bar -> b
  193. //vacopy(a, b)
  194. //foo = load a
  195. const Type* RetTy = Type::getPrimitiveType(Type::VoidTyID);
  196. const Type* ArgTy = F->getFunctionType()->getReturnType();
  197. const Type* ArgTyPtr = PointerType::get(ArgTy);
  198. Function* NF = M->getOrInsertFunction("llvm.va_copy",
  199. RetTy, ArgTyPtr, ArgTyPtr, (Type *)0);
  200. for(Value::use_iterator I = F->use_begin(), E = F->use_end(); I != E;)
  201. if (CallInst* CI = dyn_cast<CallInst>(*I++)) {
  202. AllocaInst* a = new AllocaInst(ArgTy, 0, "vacopy.fix.1", CI);
  203. AllocaInst* b = new AllocaInst(ArgTy, 0, "vacopy.fix.2", CI);
  204. new StoreInst(CI->getOperand(1), b, CI);
  205. new CallInst(NF, a, b, "", CI);
  206. Value* foo = new LoadInst(a, "vacopy.fix.3", CI);
  207. CI->replaceAllUsesWith(foo);
  208. CI->getParent()->getInstList().erase(CI);
  209. }
  210. F->setName("");
  211. }
  212. return MP;
  213. }
  214. //===----------------------------------------------------------------------===//
  215. // Wrapper functions
  216. //===----------------------------------------------------------------------===//
  217. /// getBytecodeBufferModuleProvider - lazy function-at-a-time loading from a
  218. /// buffer
  219. ModuleProvider*
  220. llvm::getBytecodeBufferModuleProvider(const unsigned char *Buffer,
  221. unsigned Length,
  222. const std::string &ModuleID,
  223. BytecodeHandler* H ) {
  224. return CheckVarargs(
  225. new BytecodeBufferReader(Buffer, Length, ModuleID, H));
  226. }
  227. /// ParseBytecodeBuffer - Parse a given bytecode buffer
  228. ///
  229. Module *llvm::ParseBytecodeBuffer(const unsigned char *Buffer, unsigned Length,
  230. const std::string &ModuleID,
  231. std::string *ErrorStr){
  232. try {
  233. std::auto_ptr<ModuleProvider>
  234. AMP(getBytecodeBufferModuleProvider(Buffer, Length, ModuleID));
  235. return AMP->releaseModule();
  236. } catch (std::string &err) {
  237. if (ErrorStr) *ErrorStr = err;
  238. return 0;
  239. }
  240. }
  241. /// getBytecodeModuleProvider - lazy function-at-a-time loading from a file
  242. ///
  243. ModuleProvider *llvm::getBytecodeModuleProvider(const std::string &Filename,
  244. BytecodeHandler* H) {
  245. if (Filename != std::string("-")) // Read from a file...
  246. return CheckVarargs(new BytecodeFileReader(Filename,H));
  247. else // Read from stdin
  248. return CheckVarargs(new BytecodeStdinReader(H));
  249. }
  250. /// ParseBytecodeFile - Parse the given bytecode file
  251. ///
  252. Module *llvm::ParseBytecodeFile(const std::string &Filename,
  253. std::string *ErrorStr) {
  254. try {
  255. std::auto_ptr<ModuleProvider> AMP(getBytecodeModuleProvider(Filename));
  256. return AMP->releaseModule();
  257. } catch (std::string &err) {
  258. if (ErrorStr) *ErrorStr = err;
  259. return 0;
  260. }
  261. }
  262. // AnalyzeBytecodeFile - analyze one file
  263. Module* llvm::AnalyzeBytecodeFile(
  264. const std::string &Filename, ///< File to analyze
  265. BytecodeAnalysis& bca, ///< Statistical output
  266. std::string *ErrorStr, ///< Error output
  267. std::ostream* output ///< Dump output
  268. )
  269. {
  270. try {
  271. BytecodeHandler* analyzerHandler =createBytecodeAnalyzerHandler(bca,output);
  272. std::auto_ptr<ModuleProvider> AMP(
  273. getBytecodeModuleProvider(Filename,analyzerHandler));
  274. return AMP->releaseModule();
  275. } catch (std::string &err) {
  276. if (ErrorStr) *ErrorStr = err;
  277. return 0;
  278. }
  279. }
  280. // AnalyzeBytecodeBuffer - analyze a buffer
  281. Module* llvm::AnalyzeBytecodeBuffer(
  282. const unsigned char* Buffer, ///< Pointer to start of bytecode buffer
  283. unsigned Length, ///< Size of the bytecode buffer
  284. const std::string& ModuleID, ///< Identifier for the module
  285. BytecodeAnalysis& bca, ///< The results of the analysis
  286. std::string* ErrorStr, ///< Errors, if any.
  287. std::ostream* output ///< Dump output, if any
  288. )
  289. {
  290. try {
  291. BytecodeHandler* hdlr = createBytecodeAnalyzerHandler(bca, output);
  292. std::auto_ptr<ModuleProvider>
  293. AMP(getBytecodeBufferModuleProvider(Buffer, Length, ModuleID, hdlr));
  294. return AMP->releaseModule();
  295. } catch (std::string &err) {
  296. if (ErrorStr) *ErrorStr = err;
  297. return 0;
  298. }
  299. }
  300. bool llvm::GetBytecodeDependentLibraries(const std::string &fname,
  301. Module::LibraryListType& deplibs) {
  302. try {
  303. std::auto_ptr<ModuleProvider> AMP( getBytecodeModuleProvider(fname));
  304. Module* M = AMP->releaseModule();
  305. deplibs = M->getLibraries();
  306. delete M;
  307. return true;
  308. } catch (...) {
  309. deplibs.clear();
  310. return false;
  311. }
  312. }
  313. static void getSymbols(Module*M, std::vector<std::string>& symbols) {
  314. // Loop over global variables
  315. for (Module::global_iterator GI = M->global_begin(), GE=M->global_end(); GI != GE; ++GI)
  316. if (!GI->isExternal() && !GI->hasInternalLinkage())
  317. if (!GI->getName().empty())
  318. symbols.push_back(GI->getName());
  319. // Loop over functions.
  320. for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI)
  321. if (!FI->isExternal() && !FI->hasInternalLinkage())
  322. if (!FI->getName().empty())
  323. symbols.push_back(FI->getName());
  324. }
  325. // Get just the externally visible defined symbols from the bytecode
  326. bool llvm::GetBytecodeSymbols(const sys::Path& fName,
  327. std::vector<std::string>& symbols) {
  328. try {
  329. std::auto_ptr<ModuleProvider> AMP(
  330. getBytecodeModuleProvider(fName.toString()));
  331. // Get the module from the provider
  332. Module* M = AMP->materializeModule();
  333. // Get the symbols
  334. getSymbols(M, symbols);
  335. // Done with the module
  336. return true;
  337. } catch (...) {
  338. return false;
  339. }
  340. }
  341. ModuleProvider*
  342. llvm::GetBytecodeSymbols(const unsigned char*Buffer, unsigned Length,
  343. const std::string& ModuleID,
  344. std::vector<std::string>& symbols) {
  345. ModuleProvider* MP = 0;
  346. try {
  347. // Get the module provider
  348. MP = getBytecodeBufferModuleProvider(Buffer, Length, ModuleID);
  349. // Get the module from the provider
  350. Module* M = MP->materializeModule();
  351. // Get the symbols
  352. getSymbols(M, symbols);
  353. // Done with the module. Note that ModuleProvider will delete the
  354. // Module when it is deleted. Also note that its the caller's responsibility
  355. // to delete the ModuleProvider.
  356. return MP;
  357. } catch (...) {
  358. // We delete only the ModuleProvider here because its destructor will
  359. // also delete the Module (we used materializeModule not releaseModule).
  360. delete MP;
  361. }
  362. return 0;
  363. }