SourceMgr.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501
  1. //===- SourceMgr.cpp - Manager for Simple Source Buffers & Diagnostics ----===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. //
  9. // This file implements the SourceMgr class. This class is used as a simple
  10. // substrate for diagnostics, #include handling, and other low level things for
  11. // simple parsers.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "llvm/Support/SourceMgr.h"
  15. #include "llvm/ADT/ArrayRef.h"
  16. #include "llvm/ADT/STLExtras.h"
  17. #include "llvm/ADT/SmallVector.h"
  18. #include "llvm/ADT/StringRef.h"
  19. #include "llvm/ADT/Twine.h"
  20. #include "llvm/Support/ErrorOr.h"
  21. #include "llvm/Support/Locale.h"
  22. #include "llvm/Support/MemoryBuffer.h"
  23. #include "llvm/Support/Path.h"
  24. #include "llvm/Support/SMLoc.h"
  25. #include "llvm/Support/WithColor.h"
  26. #include "llvm/Support/raw_ostream.h"
  27. #include <algorithm>
  28. #include <cassert>
  29. #include <cstddef>
  30. #include <limits>
  31. #include <memory>
  32. #include <string>
  33. #include <utility>
  34. using namespace llvm;
  35. static const size_t TabStop = 8;
  36. unsigned SourceMgr::AddIncludeFile(const std::string &Filename,
  37. SMLoc IncludeLoc,
  38. std::string &IncludedFile) {
  39. IncludedFile = Filename;
  40. ErrorOr<std::unique_ptr<MemoryBuffer>> NewBufOrErr =
  41. MemoryBuffer::getFile(IncludedFile);
  42. // If the file didn't exist directly, see if it's in an include path.
  43. for (unsigned i = 0, e = IncludeDirectories.size(); i != e && !NewBufOrErr;
  44. ++i) {
  45. IncludedFile =
  46. IncludeDirectories[i] + sys::path::get_separator().data() + Filename;
  47. NewBufOrErr = MemoryBuffer::getFile(IncludedFile);
  48. }
  49. if (!NewBufOrErr)
  50. return 0;
  51. return AddNewSourceBuffer(std::move(*NewBufOrErr), IncludeLoc);
  52. }
  53. unsigned SourceMgr::FindBufferContainingLoc(SMLoc Loc) const {
  54. for (unsigned i = 0, e = Buffers.size(); i != e; ++i)
  55. if (Loc.getPointer() >= Buffers[i].Buffer->getBufferStart() &&
  56. // Use <= here so that a pointer to the null at the end of the buffer
  57. // is included as part of the buffer.
  58. Loc.getPointer() <= Buffers[i].Buffer->getBufferEnd())
  59. return i + 1;
  60. return 0;
  61. }
  62. template <typename T>
  63. unsigned SourceMgr::SrcBuffer::getLineNumber(const char *Ptr) const {
  64. // Ensure OffsetCache is allocated and populated with offsets of all the
  65. // '\n' bytes.
  66. std::vector<T> *Offsets = nullptr;
  67. if (OffsetCache.isNull()) {
  68. Offsets = new std::vector<T>();
  69. OffsetCache = Offsets;
  70. size_t Sz = Buffer->getBufferSize();
  71. assert(Sz <= std::numeric_limits<T>::max());
  72. StringRef S = Buffer->getBuffer();
  73. for (size_t N = 0; N < Sz; ++N) {
  74. if (S[N] == '\n') {
  75. Offsets->push_back(static_cast<T>(N));
  76. }
  77. }
  78. } else {
  79. Offsets = OffsetCache.get<std::vector<T> *>();
  80. }
  81. const char *BufStart = Buffer->getBufferStart();
  82. assert(Ptr >= BufStart && Ptr <= Buffer->getBufferEnd());
  83. ptrdiff_t PtrDiff = Ptr - BufStart;
  84. assert(PtrDiff >= 0 && static_cast<size_t>(PtrDiff) <= std::numeric_limits<T>::max());
  85. T PtrOffset = static_cast<T>(PtrDiff);
  86. // llvm::lower_bound gives the number of EOL before PtrOffset. Add 1 to get
  87. // the line number.
  88. return llvm::lower_bound(*Offsets, PtrOffset) - Offsets->begin() + 1;
  89. }
  90. SourceMgr::SrcBuffer::SrcBuffer(SourceMgr::SrcBuffer &&Other)
  91. : Buffer(std::move(Other.Buffer)),
  92. OffsetCache(Other.OffsetCache),
  93. IncludeLoc(Other.IncludeLoc) {
  94. Other.OffsetCache = nullptr;
  95. }
  96. SourceMgr::SrcBuffer::~SrcBuffer() {
  97. if (!OffsetCache.isNull()) {
  98. if (OffsetCache.is<std::vector<uint8_t>*>())
  99. delete OffsetCache.get<std::vector<uint8_t>*>();
  100. else if (OffsetCache.is<std::vector<uint16_t>*>())
  101. delete OffsetCache.get<std::vector<uint16_t>*>();
  102. else if (OffsetCache.is<std::vector<uint32_t>*>())
  103. delete OffsetCache.get<std::vector<uint32_t>*>();
  104. else
  105. delete OffsetCache.get<std::vector<uint64_t>*>();
  106. OffsetCache = nullptr;
  107. }
  108. }
  109. std::pair<unsigned, unsigned>
  110. SourceMgr::getLineAndColumn(SMLoc Loc, unsigned BufferID) const {
  111. if (!BufferID)
  112. BufferID = FindBufferContainingLoc(Loc);
  113. assert(BufferID && "Invalid Location!");
  114. auto &SB = getBufferInfo(BufferID);
  115. const char *Ptr = Loc.getPointer();
  116. size_t Sz = SB.Buffer->getBufferSize();
  117. unsigned LineNo;
  118. if (Sz <= std::numeric_limits<uint8_t>::max())
  119. LineNo = SB.getLineNumber<uint8_t>(Ptr);
  120. else if (Sz <= std::numeric_limits<uint16_t>::max())
  121. LineNo = SB.getLineNumber<uint16_t>(Ptr);
  122. else if (Sz <= std::numeric_limits<uint32_t>::max())
  123. LineNo = SB.getLineNumber<uint32_t>(Ptr);
  124. else
  125. LineNo = SB.getLineNumber<uint64_t>(Ptr);
  126. const char *BufStart = SB.Buffer->getBufferStart();
  127. size_t NewlineOffs = StringRef(BufStart, Ptr-BufStart).find_last_of("\n\r");
  128. if (NewlineOffs == StringRef::npos) NewlineOffs = ~(size_t)0;
  129. return std::make_pair(LineNo, Ptr-BufStart-NewlineOffs);
  130. }
  131. void SourceMgr::PrintIncludeStack(SMLoc IncludeLoc, raw_ostream &OS) const {
  132. if (IncludeLoc == SMLoc()) return; // Top of stack.
  133. unsigned CurBuf = FindBufferContainingLoc(IncludeLoc);
  134. assert(CurBuf && "Invalid or unspecified location!");
  135. PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
  136. OS << "Included from "
  137. << getBufferInfo(CurBuf).Buffer->getBufferIdentifier()
  138. << ":" << FindLineNumber(IncludeLoc, CurBuf) << ":\n";
  139. }
  140. SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
  141. const Twine &Msg,
  142. ArrayRef<SMRange> Ranges,
  143. ArrayRef<SMFixIt> FixIts) const {
  144. // First thing to do: find the current buffer containing the specified
  145. // location to pull out the source line.
  146. SmallVector<std::pair<unsigned, unsigned>, 4> ColRanges;
  147. std::pair<unsigned, unsigned> LineAndCol;
  148. StringRef BufferID = "<unknown>";
  149. std::string LineStr;
  150. if (Loc.isValid()) {
  151. unsigned CurBuf = FindBufferContainingLoc(Loc);
  152. assert(CurBuf && "Invalid or unspecified location!");
  153. const MemoryBuffer *CurMB = getMemoryBuffer(CurBuf);
  154. BufferID = CurMB->getBufferIdentifier();
  155. // Scan backward to find the start of the line.
  156. const char *LineStart = Loc.getPointer();
  157. const char *BufStart = CurMB->getBufferStart();
  158. while (LineStart != BufStart && LineStart[-1] != '\n' &&
  159. LineStart[-1] != '\r')
  160. --LineStart;
  161. // Get the end of the line.
  162. const char *LineEnd = Loc.getPointer();
  163. const char *BufEnd = CurMB->getBufferEnd();
  164. while (LineEnd != BufEnd && LineEnd[0] != '\n' && LineEnd[0] != '\r')
  165. ++LineEnd;
  166. LineStr = std::string(LineStart, LineEnd);
  167. // Convert any ranges to column ranges that only intersect the line of the
  168. // location.
  169. for (unsigned i = 0, e = Ranges.size(); i != e; ++i) {
  170. SMRange R = Ranges[i];
  171. if (!R.isValid()) continue;
  172. // If the line doesn't contain any part of the range, then ignore it.
  173. if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
  174. continue;
  175. // Ignore pieces of the range that go onto other lines.
  176. if (R.Start.getPointer() < LineStart)
  177. R.Start = SMLoc::getFromPointer(LineStart);
  178. if (R.End.getPointer() > LineEnd)
  179. R.End = SMLoc::getFromPointer(LineEnd);
  180. // Translate from SMLoc ranges to column ranges.
  181. // FIXME: Handle multibyte characters.
  182. ColRanges.push_back(std::make_pair(R.Start.getPointer()-LineStart,
  183. R.End.getPointer()-LineStart));
  184. }
  185. LineAndCol = getLineAndColumn(Loc, CurBuf);
  186. }
  187. return SMDiagnostic(*this, Loc, BufferID, LineAndCol.first,
  188. LineAndCol.second-1, Kind, Msg.str(),
  189. LineStr, ColRanges, FixIts);
  190. }
  191. void SourceMgr::PrintMessage(raw_ostream &OS, const SMDiagnostic &Diagnostic,
  192. bool ShowColors) const {
  193. // Report the message with the diagnostic handler if present.
  194. if (DiagHandler) {
  195. DiagHandler(Diagnostic, DiagContext);
  196. return;
  197. }
  198. if (Diagnostic.getLoc().isValid()) {
  199. unsigned CurBuf = FindBufferContainingLoc(Diagnostic.getLoc());
  200. assert(CurBuf && "Invalid or unspecified location!");
  201. PrintIncludeStack(getBufferInfo(CurBuf).IncludeLoc, OS);
  202. }
  203. Diagnostic.print(nullptr, OS, ShowColors);
  204. }
  205. void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc,
  206. SourceMgr::DiagKind Kind,
  207. const Twine &Msg, ArrayRef<SMRange> Ranges,
  208. ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
  209. PrintMessage(OS, GetMessage(Loc, Kind, Msg, Ranges, FixIts), ShowColors);
  210. }
  211. void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind,
  212. const Twine &Msg, ArrayRef<SMRange> Ranges,
  213. ArrayRef<SMFixIt> FixIts, bool ShowColors) const {
  214. PrintMessage(errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors);
  215. }
  216. //===----------------------------------------------------------------------===//
  217. // SMDiagnostic Implementation
  218. //===----------------------------------------------------------------------===//
  219. SMDiagnostic::SMDiagnostic(const SourceMgr &sm, SMLoc L, StringRef FN,
  220. int Line, int Col, SourceMgr::DiagKind Kind,
  221. StringRef Msg, StringRef LineStr,
  222. ArrayRef<std::pair<unsigned,unsigned>> Ranges,
  223. ArrayRef<SMFixIt> Hints)
  224. : SM(&sm), Loc(L), Filename(FN), LineNo(Line), ColumnNo(Col), Kind(Kind),
  225. Message(Msg), LineContents(LineStr), Ranges(Ranges.vec()),
  226. FixIts(Hints.begin(), Hints.end()) {
  227. llvm::sort(FixIts);
  228. }
  229. static void buildFixItLine(std::string &CaretLine, std::string &FixItLine,
  230. ArrayRef<SMFixIt> FixIts, ArrayRef<char> SourceLine){
  231. if (FixIts.empty())
  232. return;
  233. const char *LineStart = SourceLine.begin();
  234. const char *LineEnd = SourceLine.end();
  235. size_t PrevHintEndCol = 0;
  236. for (ArrayRef<SMFixIt>::iterator I = FixIts.begin(), E = FixIts.end();
  237. I != E; ++I) {
  238. // If the fixit contains a newline or tab, ignore it.
  239. if (I->getText().find_first_of("\n\r\t") != StringRef::npos)
  240. continue;
  241. SMRange R = I->getRange();
  242. // If the line doesn't contain any part of the range, then ignore it.
  243. if (R.Start.getPointer() > LineEnd || R.End.getPointer() < LineStart)
  244. continue;
  245. // Translate from SMLoc to column.
  246. // Ignore pieces of the range that go onto other lines.
  247. // FIXME: Handle multibyte characters in the source line.
  248. unsigned FirstCol;
  249. if (R.Start.getPointer() < LineStart)
  250. FirstCol = 0;
  251. else
  252. FirstCol = R.Start.getPointer() - LineStart;
  253. // If we inserted a long previous hint, push this one forwards, and add
  254. // an extra space to show that this is not part of the previous
  255. // completion. This is sort of the best we can do when two hints appear
  256. // to overlap.
  257. //
  258. // Note that if this hint is located immediately after the previous
  259. // hint, no space will be added, since the location is more important.
  260. unsigned HintCol = FirstCol;
  261. if (HintCol < PrevHintEndCol)
  262. HintCol = PrevHintEndCol + 1;
  263. // FIXME: This assertion is intended to catch unintended use of multibyte
  264. // characters in fixits. If we decide to do this, we'll have to track
  265. // separate byte widths for the source and fixit lines.
  266. assert((size_t)sys::locale::columnWidth(I->getText()) ==
  267. I->getText().size());
  268. // This relies on one byte per column in our fixit hints.
  269. unsigned LastColumnModified = HintCol + I->getText().size();
  270. if (LastColumnModified > FixItLine.size())
  271. FixItLine.resize(LastColumnModified, ' ');
  272. std::copy(I->getText().begin(), I->getText().end(),
  273. FixItLine.begin() + HintCol);
  274. PrevHintEndCol = LastColumnModified;
  275. // For replacements, mark the removal range with '~'.
  276. // FIXME: Handle multibyte characters in the source line.
  277. unsigned LastCol;
  278. if (R.End.getPointer() >= LineEnd)
  279. LastCol = LineEnd - LineStart;
  280. else
  281. LastCol = R.End.getPointer() - LineStart;
  282. std::fill(&CaretLine[FirstCol], &CaretLine[LastCol], '~');
  283. }
  284. }
  285. static void printSourceLine(raw_ostream &S, StringRef LineContents) {
  286. // Print out the source line one character at a time, so we can expand tabs.
  287. for (unsigned i = 0, e = LineContents.size(), OutCol = 0; i != e; ++i) {
  288. size_t NextTab = LineContents.find('\t', i);
  289. // If there were no tabs left, print the rest, we are done.
  290. if (NextTab == StringRef::npos) {
  291. S << LineContents.drop_front(i);
  292. break;
  293. }
  294. // Otherwise, print from i to NextTab.
  295. S << LineContents.slice(i, NextTab);
  296. OutCol += NextTab - i;
  297. i = NextTab;
  298. // If we have a tab, emit at least one space, then round up to 8 columns.
  299. do {
  300. S << ' ';
  301. ++OutCol;
  302. } while ((OutCol % TabStop) != 0);
  303. }
  304. S << '\n';
  305. }
  306. static bool isNonASCII(char c) {
  307. return c & 0x80;
  308. }
  309. void SMDiagnostic::print(const char *ProgName, raw_ostream &OS,
  310. bool ShowColors, bool ShowKindLabel) const {
  311. {
  312. WithColor S(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors);
  313. if (ProgName && ProgName[0])
  314. S << ProgName << ": ";
  315. if (!Filename.empty()) {
  316. if (Filename == "-")
  317. S << "<stdin>";
  318. else
  319. S << Filename;
  320. if (LineNo != -1) {
  321. S << ':' << LineNo;
  322. if (ColumnNo != -1)
  323. S << ':' << (ColumnNo + 1);
  324. }
  325. S << ": ";
  326. }
  327. }
  328. if (ShowKindLabel) {
  329. switch (Kind) {
  330. case SourceMgr::DK_Error:
  331. WithColor::error(OS, "", !ShowColors);
  332. break;
  333. case SourceMgr::DK_Warning:
  334. WithColor::warning(OS, "", !ShowColors);
  335. break;
  336. case SourceMgr::DK_Note:
  337. WithColor::note(OS, "", !ShowColors);
  338. break;
  339. case SourceMgr::DK_Remark:
  340. WithColor::remark(OS, "", !ShowColors);
  341. break;
  342. }
  343. }
  344. WithColor(OS, raw_ostream::SAVEDCOLOR, true, false, !ShowColors)
  345. << Message << '\n';
  346. if (LineNo == -1 || ColumnNo == -1)
  347. return;
  348. // FIXME: If there are multibyte or multi-column characters in the source, all
  349. // our ranges will be wrong. To do this properly, we'll need a byte-to-column
  350. // map like Clang's TextDiagnostic. For now, we'll just handle tabs by
  351. // expanding them later, and bail out rather than show incorrect ranges and
  352. // misaligned fixits for any other odd characters.
  353. if (find_if(LineContents, isNonASCII) != LineContents.end()) {
  354. printSourceLine(OS, LineContents);
  355. return;
  356. }
  357. size_t NumColumns = LineContents.size();
  358. // Build the line with the caret and ranges.
  359. std::string CaretLine(NumColumns+1, ' ');
  360. // Expand any ranges.
  361. for (unsigned r = 0, e = Ranges.size(); r != e; ++r) {
  362. std::pair<unsigned, unsigned> R = Ranges[r];
  363. std::fill(&CaretLine[R.first],
  364. &CaretLine[std::min((size_t)R.second, CaretLine.size())],
  365. '~');
  366. }
  367. // Add any fix-its.
  368. // FIXME: Find the beginning of the line properly for multibyte characters.
  369. std::string FixItInsertionLine;
  370. buildFixItLine(CaretLine, FixItInsertionLine, FixIts,
  371. makeArrayRef(Loc.getPointer() - ColumnNo,
  372. LineContents.size()));
  373. // Finally, plop on the caret.
  374. if (unsigned(ColumnNo) <= NumColumns)
  375. CaretLine[ColumnNo] = '^';
  376. else
  377. CaretLine[NumColumns] = '^';
  378. // ... and remove trailing whitespace so the output doesn't wrap for it. We
  379. // know that the line isn't completely empty because it has the caret in it at
  380. // least.
  381. CaretLine.erase(CaretLine.find_last_not_of(' ')+1);
  382. printSourceLine(OS, LineContents);
  383. {
  384. WithColor S(OS, raw_ostream::GREEN, true, false, !ShowColors);
  385. // Print out the caret line, matching tabs in the source line.
  386. for (unsigned i = 0, e = CaretLine.size(), OutCol = 0; i != e; ++i) {
  387. if (i >= LineContents.size() || LineContents[i] != '\t') {
  388. S << CaretLine[i];
  389. ++OutCol;
  390. continue;
  391. }
  392. // Okay, we have a tab. Insert the appropriate number of characters.
  393. do {
  394. S << CaretLine[i];
  395. ++OutCol;
  396. } while ((OutCol % TabStop) != 0);
  397. }
  398. S << '\n';
  399. }
  400. // Print out the replacement line, matching tabs in the source line.
  401. if (FixItInsertionLine.empty())
  402. return;
  403. for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) {
  404. if (i >= LineContents.size() || LineContents[i] != '\t') {
  405. OS << FixItInsertionLine[i];
  406. ++OutCol;
  407. continue;
  408. }
  409. // Okay, we have a tab. Insert the appropriate number of characters.
  410. do {
  411. OS << FixItInsertionLine[i];
  412. // FIXME: This is trying not to break up replacements, but then to re-sync
  413. // with the tabs between replacements. This will fail, though, if two
  414. // fix-it replacements are exactly adjacent, or if a fix-it contains a
  415. // space. Really we should be precomputing column widths, which we'll
  416. // need anyway for multibyte chars.
  417. if (FixItInsertionLine[i] != ' ')
  418. ++i;
  419. ++OutCol;
  420. } while (((OutCol % TabStop) != 0) && i != e);
  421. }
  422. OS << '\n';
  423. }