SourceMgr.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. //===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements the SourceMgr class. This class is used as a simple
  11. // substrate for diagnostics, #include handling, and other low level things for
  12. // simple parsers.
  13. //
  14. //===----------------------------------------------------------------------===//
  15. #include "llvm/Support/SourceMgr.h"
  16. #include "llvm/ADT/ArrayRef.h"
  17. #include "llvm/ADT/STLExtras.h"
  18. #include "llvm/ADT/SmallVector.h"
  19. #include "llvm/ADT/StringRef.h"
  20. #include "llvm/ADT/Twine.h"
  21. #include "llvm/Support/ErrorOr.h"
  22. #include "llvm/Support/Locale.h"
  23. #include "llvm/Support/MemoryBuffer.h"
  24. #include "llvm/Support/Path.h"
  25. #include "llvm/Support/SMLoc.h"
  26. #include "llvm/Support/raw_ostream.h"
  27. #include <algorithm>
  28. #include <cassert>
  29. #include <cstddef>
  30. #include <limits>
  31. #include <memory>
  32. #include <string>
  33. #include <utility>
  34. using namespace llvm;
  35. static const size_t TabStop = 8;
  36. unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
  37. SMLoc IncludeLoc,
  38. std::string &IncludedFile) {
  39. IncludedFile = Filename;
  40. ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr =
  41. MemoryBuffer::getFile(IncludedFile);
  42. // If the file didn't exist directly, see if it's in an include path.
  43. for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr;
  44. ++i) {
  45. IncludedFile =
  46. IncludeDirectories[i] + sys::path::get_separator().data() + Filename;
  47. NewBufOrErr = MemoryBuffer::getFile(IncludedFile);
  48. }
  49. if (!NewBufOrErr)
  50. return 0;
  51. return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc);
  52. }
  53. unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
  54. for (unsigned i = 0, e = Buffers.size(); i != e; ++i)
  55. if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() &&
  56. // Use <= here so that a pointer to the null at the end of the buffer
  57. // is included as part of the buffer.
  58. Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd())
  59. return i + 1;
  60. return 0;
  61. }
  62. template <typename T>
  63. unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const {
  64. // Ensure OffsetCache is allocated and populated with offsets of all the
  65. // '\n' bytes.
  66. std::vector<T> *Offsets = nullptr;
  67. if (OffsetCache.isNull()) {
  68. Offsets = new std::vector<T>();
  69. OffsetCache = Offsets;
  70. size_t Sz = Buffer->getBufferSize();
  71. assert(Sz <= std::numeric_limits<T>::max());
  72. StringRef S = Buffer->getBuffer();
  73. for (size_t N = 0; N < Sz; ++N) {
  74. if (S[N] == '\n') {
  75. Offsets->push_back(static_cast<T>(N));
  76. }
  77. }
  78. } else {
  79. Offsets = OffsetCache.get<std::vector<T> *>();
  80. }
  81. const char *BufStart = Buffer->getBufferStart();
  82. assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd());
  83. ptrdiff_t PtrDiff = Ptr - BufStart;
  84. assert(PtrDiff >= 0 && static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max());
  85. T PtrOffset = static_cast<T>(PtrDiff);
  86. // std::lower_bound returns the first EOL offset that's not-less-than
  87. // PtrOffset, meaning the EOL that _ends the line_ that PtrOffset is on
  88. // (including if PtrOffset refers to the EOL itself). If there's no such
  89. // EOL, returns end().
  90. auto EOL = std::lower_bound(Offsets->begin(), Offsets->end(), PtrOffset);
  91. // Lines count from 1, so add 1 to the distance from the 0th line.
  92. return (1 + (EOL - Offsets->begin()));
  93. }
  94. SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other)
  95. : Buffer(std::move(Other.Buffer)),
  96. OffsetCache(Other.OffsetCache),
  97. IncludeLoc(Other.IncludeLoc) {
  98. Other.OffsetCache = nullptr;
  99. }
  100. SourceMgr::SrcBuffer::~SrcBuffer() {
  101. if (!OffsetCache.isNull()) {
  102. if (OffsetCache.is<std::vector<uint8_t>*>())
  103. delete OffsetCache.get<std::vector<uint8_t>*>();
  104. else if (OffsetCache.is<std::vector<uint16_t>*>())
  105. delete OffsetCache.get<std::vector<uint16_t>*>();
  106. else if (OffsetCache.is<std::vector<uint32_t>*>())
  107. delete OffsetCache.get<std::vector<uint32_t>*>();
  108. else
  109. delete OffsetCache.get<std::vector<uint64_t>*>();
  110. OffsetCache = nullptr;
  111. }
  112. }
  113. std::pair<unsigned, unsigned>
  114. SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
  115. if (!BufferID)
  116. BufferID = FindBufferContainingLoc(Loc);
  117. assert(BufferID && "Invalid Location!");
  118. auto &SB = getBufferInfo(BufferID);
  119. const char *Ptr = Loc.getPointer();
  120. size_t Sz = SB.Buffer->getBufferSize();
  121. assert(Sz <= std::numeric_limits<uint64_t>::max());
  122. unsigned LineNo;
  123. if (Sz <= std::numeric_limits<uint8_t>::max())
  124. LineNo = SB.getLineNumber<uint8_t>(Ptr);
  125. else if (Sz <= std::numeric_limits<uint16_t>::max())
  126. LineNo = SB.getLineNumber<uint16_t>(Ptr);
  127. else if (Sz <= std::numeric_limits<uint32_t>::max())
  128. LineNo = SB.getLineNumber<uint32_t>(Ptr);
  129. else
  130. LineNo = SB.getLineNumber<uint64_t>(Ptr);
  131. const char *BufStart = SB.Buffer->getBufferStart();
  132. size_t NewlineOffs = StringRef(BufStart, Ptr-BufStart).find_last_of("\n\r");
  133. if (NewlineOffs == StringRef::npos) NewlineOffs = ~(size_t)0;
  134. return std::make_pair(LineNo, Ptr-BufStart-NewlineOffs);
  135. }
  136. void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
  137. if (IncludeLoc == SMLoc()) return; // Top of stack.
  138. unsigned CurBuf = FindBufferContainingLoc(IncludeLoc);
  139. assert(CurBuf && "Invalid or unspecified location!");
  140. PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
  141. OS << "Included from "
  142. << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
  143. << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
  144. }
  145. SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
  146. const Twine &Msg,
  147. ArrayRef<SMRange> Ranges,
  148. ArrayRef<SMFixIt> FixIts) const {
  149. // First thing to do: find the current buffer containing the specified
  150. // location to pull out the source line.
  151. SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges;
  152. std::pair<unsigned, unsigned> LineAndCol;
  153. StringRef BufferID = "<unknown>";
  154. std::string LineStr;
  155. if (Loc.isValid()) {
  156. unsigned CurBuf = FindBufferContainingLoc(Loc);
  157. assert(CurBuf && "Invalid or unspecified location!");
  158. const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf);
  159. BufferID = CurMB->getBufferIdentifier();
  160. // Scan backward to find the start of the line.
  161. const char *LineStart = Loc.getPointer();
  162. const char *BufStart = CurMB->getBufferStart();
  163. while (LineStart != BufStart && LineStart[-1] != '\n' &&
  164. LineStart[-1] != '\r')
  165. --LineStart;
  166. // Get the end of the line.
  167. const char *LineEnd = Loc.getPointer();
  168. const char *BufEnd = CurMB->getBufferEnd();
  169. while (LineEnd != BufEnd && LineEnd[0] != '\n' && LineEnd[0] != '\r')
  170. ++LineEnd;
  171. LineStr = std::string(LineStart, LineEnd);
  172. // Convert any ranges to column ranges that only intersect the line of the
  173. // location.
  174. for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
  175. SMRange R = Ranges[i];
  176. if (!R.isValid()) continue;
  177. // If the line doesn't contain any part of the range, then ignore it.
  178. if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
  179. continue;
  180. // Ignore pieces of the range that go onto other lines.
  181. if (R.Start.getPointer() < LineStart)
  182. R.Start = SMLoc::getFromPointer(LineStart);
  183. if (R.End.getPointer() > LineEnd)
  184. R.End = SMLoc::getFromPointer(LineEnd);
  185. // Translate from SMLoc ranges to column ranges.
  186. // FIXME: Handle multibyte characters.
  187. ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart,
  188. R.End.getPointer()-LineStart));
  189. }
  190. LineAndCol = getLineAndColumn(Loc, CurBuf);
  191. }
  192. return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first,
  193. LineAndCol.second-1, Kind, Msg.str(),
  194. LineStr, ColRanges, FixIts);
  195. }
  196. void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic,
  197. bool ShowColors) const {
  198. // Report the message with the diagnostic handler if present.
  199. if (DiagHandler) {
  200. DiagHandler(Diagnostic, DiagContext);
  201. return;
  202. }
  203. if (Diagnostic.getLoc().isValid()) {
  204. unsigned CurBuf = FindBufferContainingLoc(Diagnostic.getLoc());
  205. assert(CurBuf && "Invalid or unspecified location!");
  206. PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
  207. }
  208. Diagnostic.print(nullptr, OS, ShowColors);
  209. }
  210. void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc,
  211. SourceMgr::DiagKind Kind,
  212. const Twine &Msg, ArrayRef<SMRange> Ranges,
  213. ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
  214. PrintMessage(OS, GetMessage(Loc, Kind, Msg, Ranges, FixIts), ShowColors);
  215. }
  216. void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
  217. const Twine &Msg, ArrayRef<SMRange> Ranges,
  218. ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
  219. PrintMessage(errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors);
  220. }
  221. //===----------------------------------------------------------------------===//
  222. // SMDiagnostic Implementation
  223. //===----------------------------------------------------------------------===//
  224. SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN,
  225. int Line, int Col, SourceMgr::DiagKind Kind,
  226. StringRef Msg, StringRef LineStr,
  227. ArrayRef<std::pair<unsigned,unsigned>> Ranges,
  228. ArrayRef<SMFixIt> Hints)
  229. : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Kind(Kind),
  230. Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()),
  231. FixIts(Hints.begin(), Hints.end()) {
  232. llvm::sort(FixIts.begin(), FixIts.end());
  233. }
  234. static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
  235. ArrayRef<SMFixIt> FixIts, ArrayRef<char> SourceLine){
  236. if (FixIts.empty())
  237. return;
  238. const char *LineStart = SourceLine.begin();
  239. const char *LineEnd = SourceLine.end();
  240. size_t PrevHintEndCol = 0;
  241. for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end();
  242. I != E; ++I) {
  243. // If the fixit contains a newline or tab, ignore it.
  244. if (I->getText().find_first_of("\n\r\t") != StringRef::npos)
  245. continue;
  246. SMRange R = I->getRange();
  247. // If the line doesn't contain any part of the range, then ignore it.
  248. if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
  249. continue;
  250. // Translate from SMLoc to column.
  251. // Ignore pieces of the range that go onto other lines.
  252. // FIXME: Handle multibyte characters in the source line.
  253. unsigned FirstCol;
  254. if (R.Start.getPointer() < LineStart)
  255. FirstCol = 0;
  256. else
  257. FirstCol = R.Start.getPointer() - LineStart;
  258. // If we inserted a long previous hint, push this one forwards, and add
  259. // an extra space to show that this is not part of the previous
  260. // completion. This is sort of the best we can do when two hints appear
  261. // to overlap.
  262. //
  263. // Note that if this hint is located immediately after the previous
  264. // hint, no space will be added, since the location is more important.
  265. unsigned HintCol = FirstCol;
  266. if (HintCol < PrevHintEndCol)
  267. HintCol = PrevHintEndCol + 1;
  268. // FIXME: This assertion is intended to catch unintended use of multibyte
  269. // characters in fixits. If we decide to do this, we'll have to track
  270. // separate byte widths for the source and fixit lines.
  271. assert((size_t)sys::locale::columnWidth(I->getText()) ==
  272. I->getText().size());
  273. // This relies on one byte per column in our fixit hints.
  274. unsigned LastColumnModified = HintCol + I->getText().size();
  275. if (LastColumnModified > FixItLine.size())
  276. FixItLine.resize(LastColumnModified, ' ');
  277. std::copy(I->getText().begin(), I->getText().end(),
  278. FixItLine.begin() + HintCol);
  279. PrevHintEndCol = LastColumnModified;
  280. // For replacements, mark the removal range with '~'.
  281. // FIXME: Handle multibyte characters in the source line.
  282. unsigned LastCol;
  283. if (R.End.getPointer() >= LineEnd)
  284. LastCol = LineEnd - LineStart;
  285. else
  286. LastCol = R.End.getPointer() - LineStart;
  287. std::fill(&CaretLine[FirstCol], &CaretLine[LastCol], '~');
  288. }
  289. }
  290. static void printSourceLine(raw_ostream &S, StringRef LineContents) {
  291. // Print out the source line one character at a time, so we can expand tabs.
  292. for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
  293. if (LineContents[i] != '\t') {
  294. S << LineContents[i];
  295. ++OutCol;
  296. continue;
  297. }
  298. // If we have a tab, emit at least one space, then round up to 8 columns.
  299. do {
  300. S << ' ';
  301. ++OutCol;
  302. } while ((OutCol % TabStop) != 0);
  303. }
  304. S << '\n';
  305. }
  306. static bool isNonASCII(char c) {
  307. return c & 0x80;
  308. }
  309. void SMDiagnostic::print(const char *ProgName, raw_ostream &S, bool ShowColors,
  310. bool ShowKindLabel) const {
  311. // Display colors only if OS supports colors.
  312. ShowColors &= S.has_colors();
  313. if (ShowColors)
  314. S.changeColor(raw_ostream::SAVEDCOLOR, true);
  315. if (ProgName && ProgName[0])
  316. S << ProgName << ": ";
  317. if (!Filename.empty()) {
  318. if (Filename == "-")
  319. S << "<stdin>";
  320. else
  321. S << Filename;
  322. if (LineNo != -1) {
  323. S << ':' << LineNo;
  324. if (ColumnNo != -1)
  325. S << ':' << (ColumnNo+1);
  326. }
  327. S << ": ";
  328. }
  329. if (ShowKindLabel) {
  330. switch (Kind) {
  331. case SourceMgr::DK_Error:
  332. if (ShowColors)
  333. S.changeColor(raw_ostream::RED, true);
  334. S << "error: ";
  335. break;
  336. case SourceMgr::DK_Warning:
  337. if (ShowColors)
  338. S.changeColor(raw_ostream::MAGENTA, true);
  339. S << "warning: ";
  340. break;
  341. case SourceMgr::DK_Note:
  342. if (ShowColors)
  343. S.changeColor(raw_ostream::BLACK, true);
  344. S << "note: ";
  345. break;
  346. case SourceMgr::DK_Remark:
  347. if (ShowColors)
  348. S.changeColor(raw_ostream::BLUE, true);
  349. S << "remark: ";
  350. break;
  351. }
  352. if (ShowColors) {
  353. S.resetColor();
  354. S.changeColor(raw_ostream::SAVEDCOLOR, true);
  355. }
  356. }
  357. S << Message << '\n';
  358. if (ShowColors)
  359. S.resetColor();
  360. if (LineNo == -1 || ColumnNo == -1)
  361. return;
  362. // FIXME: If there are multibyte or multi-column characters in the source, all
  363. // our ranges will be wrong. To do this properly, we'll need a byte-to-column
  364. // map like Clang's TextDiagnostic. For now, we'll just handle tabs by
  365. // expanding them later, and bail out rather than show incorrect ranges and
  366. // misaligned fixits for any other odd characters.
  367. if (find_if(LineContents, isNonASCII) != LineContents.end()) {
  368. printSourceLine(S, LineContents);
  369. return;
  370. }
  371. size_t NumColumns = LineContents.size();
  372. // Build the line with the caret and ranges.
  373. std::string CaretLine(NumColumns+1, ' ');
  374. // Expand any ranges.
  375. for (unsigned r = 0, e = Ranges.size(); r != e; ++r) {
  376. std::pair<unsigned, unsigned> R = Ranges[r];
  377. std::fill(&CaretLine[R.first],
  378. &CaretLine[std::min((size_t)R.second, CaretLine.size())],
  379. '~');
  380. }
  381. // Add any fix-its.
  382. // FIXME: Find the beginning of the line properly for multibyte characters.
  383. std::string FixItInsertionLine;
  384. buildFixItLine(CaretLine, FixItInsertionLine, FixIts,
  385. makeArrayRef(Loc.getPointer() - ColumnNo,
  386. LineContents.size()));
  387. // Finally, plop on the caret.
  388. if (unsigned(ColumnNo) <= NumColumns)
  389. CaretLine[ColumnNo] = '^';
  390. else
  391. CaretLine[NumColumns] = '^';
  392. // ... and remove trailing whitespace so the output doesn't wrap for it. We
  393. // know that the line isn't completely empty because it has the caret in it at
  394. // least.
  395. CaretLine.erase(CaretLine.find_last_not_of(' ')+1);
  396. printSourceLine(S, LineContents);
  397. if (ShowColors)
  398. S.changeColor(raw_ostream::GREEN, true);
  399. // Print out the caret line, matching tabs in the source line.
  400. for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) {
  401. if (i >= LineContents.size() || LineContents[i] != '\t') {
  402. S << CaretLine[i];
  403. ++OutCol;
  404. continue;
  405. }
  406. // Okay, we have a tab. Insert the appropriate number of characters.
  407. do {
  408. S << CaretLine[i];
  409. ++OutCol;
  410. } while ((OutCol % TabStop) != 0);
  411. }
  412. S << '\n';
  413. if (ShowColors)
  414. S.resetColor();
  415. // Print out the replacement line, matching tabs in the source line.
  416. if (FixItInsertionLine.empty())
  417. return;
  418. for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) {
  419. if (i >= LineContents.size() || LineContents[i] != '\t') {
  420. S << FixItInsertionLine[i];
  421. ++OutCol;
  422. continue;
  423. }
  424. // Okay, we have a tab. Insert the appropriate number of characters.
  425. do {
  426. S << FixItInsertionLine[i];
  427. // FIXME: This is trying not to break up replacements, but then to re-sync
  428. // with the tabs between replacements. This will fail, though, if two
  429. // fix-it replacements are exactly adjacent, or if a fix-it contains a
  430. // space. Really we should be precomputing column widths, which we'll
  431. // need anyway for multibyte chars.
  432. if (FixItInsertionLine[i] != ' ')
  433. ++i;
  434. ++OutCol;
  435. } while (((OutCol % TabStop) != 0) && i != e);
  436. }
  437. S << '\n';
  438. }