SourceManager.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. //===--- SourceManager.cpp - Track and cache source files -----------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file was developed by Chris Lattner and is distributed under
  6. // the University of Illinois Open Source License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements the SourceManager interface.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "clang/Basic/SourceManager.h"
  14. #include "clang/Basic/FileManager.h"
  15. #include "llvm/Support/MemoryBuffer.h"
  16. #include "llvm/System/Path.h"
  17. #include <algorithm>
  18. #include <iostream>
  19. using namespace clang;
  20. using namespace SrcMgr;
  21. using llvm::MemoryBuffer;
  22. SourceManager::~SourceManager() {
  23. for (std::map<const FileEntry *, FileInfo>::iterator I = FileInfos.begin(),
  24. E = FileInfos.end(); I != E; ++I) {
  25. delete I->second.Buffer;
  26. delete[] I->second.SourceLineCache;
  27. }
  28. for (std::list<InfoRec>::iterator I = MemBufferInfos.begin(),
  29. E = MemBufferInfos.end(); I != E; ++I) {
  30. delete I->second.Buffer;
  31. delete[] I->second.SourceLineCache;
  32. }
  33. }
  34. // FIXME: REMOVE THESE
  35. #include <unistd.h>
  36. #include <sys/types.h>
  37. #include <sys/uio.h>
  38. #include <sys/fcntl.h>
  39. #include <cerrno>
  40. static const MemoryBuffer *ReadFileFast(const FileEntry *FileEnt) {
  41. #if 0
  42. // FIXME: Reintroduce this and zap this function once the common llvm stuff
  43. // is fast for the small case.
  44. return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
  45. FileEnt->getSize());
  46. #endif
  47. // If the file is larger than some threshold, use 'read', otherwise use mmap.
  48. if (FileEnt->getSize() >= 4096*4)
  49. return MemoryBuffer::getFile(FileEnt->getName(), strlen(FileEnt->getName()),
  50. 0, FileEnt->getSize());
  51. MemoryBuffer *SB = MemoryBuffer::getNewUninitMemBuffer(FileEnt->getSize(),
  52. FileEnt->getName());
  53. char *BufPtr = const_cast<char*>(SB->getBufferStart());
  54. int FD = ::open(FileEnt->getName(), O_RDONLY);
  55. if (FD == -1) {
  56. delete SB;
  57. return 0;
  58. }
  59. unsigned BytesLeft = FileEnt->getSize();
  60. while (BytesLeft) {
  61. ssize_t NumRead = ::read(FD, BufPtr, BytesLeft);
  62. if (NumRead != -1) {
  63. BytesLeft -= NumRead;
  64. BufPtr += NumRead;
  65. } else if (errno == EINTR) {
  66. // try again
  67. } else {
  68. // error reading.
  69. close(FD);
  70. delete SB;
  71. return 0;
  72. }
  73. }
  74. close(FD);
  75. return SB;
  76. }
  77. /// getFileInfo - Create or return a cached FileInfo for the specified file.
  78. ///
  79. const InfoRec *
  80. SourceManager::getInfoRec(const FileEntry *FileEnt) {
  81. assert(FileEnt && "Didn't specify a file entry to use?");
  82. // Do we already have information about this file?
  83. std::map<const FileEntry *, FileInfo>::iterator I =
  84. FileInfos.lower_bound(FileEnt);
  85. if (I != FileInfos.end() && I->first == FileEnt)
  86. return &*I;
  87. // Nope, get information.
  88. const MemoryBuffer *File = ReadFileFast(FileEnt);
  89. if (File == 0)
  90. return 0;
  91. const InfoRec &Entry =
  92. *FileInfos.insert(I, std::make_pair(FileEnt, FileInfo()));
  93. FileInfo &Info = const_cast<FileInfo &>(Entry.second);
  94. Info.Buffer = File;
  95. Info.SourceLineCache = 0;
  96. Info.NumLines = 0;
  97. return &Entry;
  98. }
  99. /// createMemBufferInfoRec - Create a new info record for the specified memory
  100. /// buffer. This does no caching.
  101. const InfoRec *
  102. SourceManager::createMemBufferInfoRec(const MemoryBuffer *Buffer) {
  103. // Add a new info record to the MemBufferInfos list and return it.
  104. FileInfo FI;
  105. FI.Buffer = Buffer;
  106. FI.SourceLineCache = 0;
  107. FI.NumLines = 0;
  108. MemBufferInfos.push_back(InfoRec(0, FI));
  109. return &MemBufferInfos.back();
  110. }
  111. /// createFileID - Create a new fileID for the specified InfoRec and include
  112. /// position. This works regardless of whether the InfoRec corresponds to a
  113. /// file or some other input source.
  114. unsigned SourceManager::createFileID(const InfoRec *File,
  115. SourceLocation IncludePos) {
  116. // If FileEnt is really large (e.g. it's a large .i file), we may not be able
  117. // to fit an arbitrary position in the file in the FilePos field. To handle
  118. // this, we create one FileID for each chunk of the file that fits in a
  119. // FilePos field.
  120. unsigned FileSize = File->second.Buffer->getBufferSize();
  121. if (FileSize+1 < (1 << SourceLocation::FilePosBits)) {
  122. FileIDs.push_back(FileIDInfo::getNormalBuffer(IncludePos, 0, File));
  123. assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
  124. "Ran out of file ID's!");
  125. return FileIDs.size();
  126. }
  127. // Create one FileID for each chunk of the file.
  128. unsigned Result = FileIDs.size()+1;
  129. unsigned ChunkNo = 0;
  130. while (1) {
  131. FileIDs.push_back(FileIDInfo::getNormalBuffer(IncludePos, ChunkNo++, File));
  132. if (FileSize+1 < (1 << SourceLocation::FilePosBits)) break;
  133. FileSize -= (1 << SourceLocation::FilePosBits);
  134. }
  135. assert(FileIDs.size() < (1 << SourceLocation::FileIDBits) &&
  136. "Ran out of file ID's!");
  137. return Result;
  138. }
  139. /// getInstantiationLoc - Return a new SourceLocation that encodes the fact
  140. /// that a token from physloc PhysLoc should actually be referenced from
  141. /// InstantiationLoc.
  142. SourceLocation SourceManager::getInstantiationLoc(SourceLocation PhysLoc,
  143. SourceLocation InstantLoc) {
  144. assert(getFIDInfo(PhysLoc.getFileID())->IDType !=
  145. SrcMgr::FileIDInfo::MacroExpansion &&
  146. "Location instantiated in a macro?");
  147. // Resolve InstantLoc down to a real logical location.
  148. InstantLoc = getLogicalLoc(InstantLoc);
  149. unsigned InstantiationFileID;
  150. // If this is the same instantiation as was requested last time, return this
  151. // immediately.
  152. if (PhysLoc.getFileID() == LastInstantiationLoc_MacroFID &&
  153. InstantLoc == LastInstantiationLoc_InstantLoc) {
  154. InstantiationFileID = LastInstantiationLoc_Result;
  155. } else {
  156. // Add a FileID for this. FIXME: should cache these!
  157. FileIDs.push_back(FileIDInfo::getMacroExpansion(InstantLoc,
  158. PhysLoc.getFileID()));
  159. InstantiationFileID = FileIDs.size();
  160. // Remember this in the single-entry cache for next time.
  161. LastInstantiationLoc_MacroFID = PhysLoc.getFileID();
  162. LastInstantiationLoc_InstantLoc = InstantLoc;
  163. LastInstantiationLoc_Result = InstantiationFileID;
  164. }
  165. return SourceLocation(InstantiationFileID, PhysLoc.getRawFilePos());
  166. }
  167. /// getCharacterData - Return a pointer to the start of the specified location
  168. /// in the appropriate MemoryBuffer.
  169. const char *SourceManager::getCharacterData(SourceLocation SL) const {
  170. // Note that this is a hot function in the getSpelling() path, which is
  171. // heavily used by -E mode.
  172. unsigned FileID = SL.getFileID();
  173. assert(FileID && "Invalid source location!");
  174. return getFileInfo(FileID)->Buffer->getBufferStart() + getFilePos(SL);
  175. }
  176. /// getIncludeLoc - Return the location of the #include for the specified
  177. /// FileID.
  178. SourceLocation SourceManager::getIncludeLoc(unsigned FileID) const {
  179. const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(FileID);
  180. // For Macros, the physical loc is specified by the MacroTokenFileID.
  181. if (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion)
  182. FIDInfo = &FileIDs[FIDInfo->u.MacroTokenFileID-1];
  183. return FIDInfo->IncludeLoc;
  184. }
  185. /// getColumnNumber - Return the column # for the specified include position.
  186. /// this is significantly cheaper to compute than the line number. This returns
  187. /// zero if the column number isn't known.
  188. unsigned SourceManager::getColumnNumber(SourceLocation Loc) const {
  189. Loc = getLogicalLoc(Loc);
  190. unsigned FileID = Loc.getFileID();
  191. if (FileID == 0) return 0;
  192. unsigned FilePos = getFilePos(Loc);
  193. const MemoryBuffer *Buffer = getBuffer(FileID);
  194. const char *Buf = Buffer->getBufferStart();
  195. unsigned LineStart = FilePos;
  196. while (LineStart && Buf[LineStart-1] != '\n' && Buf[LineStart-1] != '\r')
  197. --LineStart;
  198. return FilePos-LineStart+1;
  199. }
  200. /// getSourceName - This method returns the name of the file or buffer that
  201. /// the SourceLocation specifies. This can be modified with #line directives,
  202. /// etc.
  203. std::string SourceManager::getSourceName(SourceLocation Loc) {
  204. Loc = getLogicalLoc(Loc);
  205. unsigned FileID = Loc.getFileID();
  206. if (FileID == 0) return "";
  207. return getFileInfo(FileID)->Buffer->getBufferIdentifier();
  208. }
  209. /// getLineNumber - Given a SourceLocation, return the physical line number
  210. /// for the position indicated. This requires building and caching a table of
  211. /// line offsets for the MemoryBuffer, so this is not cheap: use only when
  212. /// about to emit a diagnostic.
  213. unsigned SourceManager::getLineNumber(SourceLocation Loc) {
  214. Loc = getLogicalLoc(Loc);
  215. unsigned FileID = Loc.getFileID();
  216. if (FileID == 0) return 0;
  217. FileInfo *FileInfo = getFileInfo(FileID);
  218. // If this is the first use of line information for this buffer, compute the
  219. /// SourceLineCache for it on demand.
  220. if (FileInfo->SourceLineCache == 0) {
  221. const MemoryBuffer *Buffer = FileInfo->Buffer;
  222. // Find the file offsets of all of the *physical* source lines. This does
  223. // not look at trigraphs, escaped newlines, or anything else tricky.
  224. std::vector<unsigned> LineOffsets;
  225. // Line #1 starts at char 0.
  226. LineOffsets.push_back(0);
  227. const unsigned char *Buf = (const unsigned char *)Buffer->getBufferStart();
  228. const unsigned char *End = (const unsigned char *)Buffer->getBufferEnd();
  229. unsigned Offs = 0;
  230. while (1) {
  231. // Skip over the contents of the line.
  232. // TODO: Vectorize this? This is very performance sensitive for programs
  233. // with lots of diagnostics and in -E mode.
  234. const unsigned char *NextBuf = (const unsigned char *)Buf;
  235. while (*NextBuf != '\n' && *NextBuf != '\r' && *NextBuf != '\0')
  236. ++NextBuf;
  237. Offs += NextBuf-Buf;
  238. Buf = NextBuf;
  239. if (Buf[0] == '\n' || Buf[0] == '\r') {
  240. // If this is \n\r or \r\n, skip both characters.
  241. if ((Buf[1] == '\n' || Buf[1] == '\r') && Buf[0] != Buf[1])
  242. ++Offs, ++Buf;
  243. ++Offs, ++Buf;
  244. LineOffsets.push_back(Offs);
  245. } else {
  246. // Otherwise, this is a null. If end of file, exit.
  247. if (Buf == End) break;
  248. // Otherwise, skip the null.
  249. ++Offs, ++Buf;
  250. }
  251. }
  252. LineOffsets.push_back(Offs);
  253. // Copy the offsets into the FileInfo structure.
  254. FileInfo->NumLines = LineOffsets.size();
  255. FileInfo->SourceLineCache = new unsigned[LineOffsets.size()];
  256. std::copy(LineOffsets.begin(), LineOffsets.end(),
  257. FileInfo->SourceLineCache);
  258. }
  259. // Okay, we know we have a line number table. Do a binary search to find the
  260. // line number that this character position lands on.
  261. unsigned NumLines = FileInfo->NumLines;
  262. unsigned *SourceLineCache = FileInfo->SourceLineCache;
  263. // TODO: If this is performance sensitive, we could try doing simple radix
  264. // type approaches to make good (tight?) initial guesses based on the
  265. // assumption that all lines are the same average size.
  266. unsigned *Pos = std::lower_bound(SourceLineCache, SourceLineCache+NumLines,
  267. getFilePos(Loc)+1);
  268. return Pos-SourceLineCache;
  269. }
  270. /// getSourceFilePos - This method returns the *logical* offset from the start
  271. /// of the file that the specified SourceLocation represents. This returns
  272. /// the location of the *logical* character data, not the physical file
  273. /// position. In the case of macros, for example, this returns where the
  274. /// macro was instantiated, not where the characters for the macro can be
  275. /// found.
  276. unsigned SourceManager::getSourceFilePos(SourceLocation Loc) const {
  277. // If this is a macro, we need to get the instantiation location.
  278. const SrcMgr::FileIDInfo *FIDInfo = getFIDInfo(Loc.getFileID());
  279. while (FIDInfo->IDType == SrcMgr::FileIDInfo::MacroExpansion) {
  280. Loc = FIDInfo->IncludeLoc;
  281. FIDInfo = getFIDInfo(Loc.getFileID());
  282. }
  283. return getFilePos(Loc);
  284. }
  285. /// PrintStats - Print statistics to stderr.
  286. ///
  287. void SourceManager::PrintStats() const {
  288. std::cerr << "\n*** Source Manager Stats:\n";
  289. std::cerr << FileInfos.size() << " files mapped, " << MemBufferInfos.size()
  290. << " mem buffers mapped, " << FileIDs.size()
  291. << " file ID's allocated.\n";
  292. unsigned NumBuffers = 0, NumMacros = 0;
  293. for (unsigned i = 0, e = FileIDs.size(); i != e; ++i) {
  294. if (FileIDs[i].IDType == FileIDInfo::NormalBuffer)
  295. ++NumBuffers;
  296. else if (FileIDs[i].IDType == FileIDInfo::MacroExpansion)
  297. ++NumMacros;
  298. else
  299. assert(0 && "Unknown FileID!");
  300. }
  301. std::cerr << " " << NumBuffers << " normal buffer FileID's, "
  302. << NumMacros << " macro expansion FileID's.\n";
  303. unsigned NumLineNumsComputed = 0;
  304. unsigned NumFileBytesMapped = 0;
  305. for (std::map<const FileEntry *, FileInfo>::const_iterator I =
  306. FileInfos.begin(), E = FileInfos.end(); I != E; ++I) {
  307. NumLineNumsComputed += I->second.SourceLineCache != 0;
  308. NumFileBytesMapped += I->second.Buffer->getBufferSize();
  309. }
  310. std::cerr << NumFileBytesMapped << " bytes of files mapped, "
  311. << NumLineNumsComputed << " files with line #'s computed.\n";
  312. }