TokenAnalyzer.cpp 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154
  1. //===--- TokenAnalyzer.cpp - Analyze Token Streams --------------*- C++ -*-===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. ///
  10. /// \file
  11. /// \brief This file implements an abstract TokenAnalyzer and associated helper
  12. /// classes. TokenAnalyzer can be extended to generate replacements based on
  13. /// an annotated and pre-processed token stream.
  14. ///
  15. //===----------------------------------------------------------------------===//
  16. #include "TokenAnalyzer.h"
  17. #include "AffectedRangeManager.h"
  18. #include "Encoding.h"
  19. #include "FormatToken.h"
  20. #include "FormatTokenLexer.h"
  21. #include "TokenAnnotator.h"
  22. #include "UnwrappedLineParser.h"
  23. #include "clang/Basic/Diagnostic.h"
  24. #include "clang/Basic/DiagnosticOptions.h"
  25. #include "clang/Basic/FileManager.h"
  26. #include "clang/Basic/SourceManager.h"
  27. #include "clang/Format/Format.h"
  28. #include "llvm/ADT/STLExtras.h"
  29. #include "llvm/Support/Debug.h"
  30. #define DEBUG_TYPE "format-formatter"
  31. namespace clang {
  32. namespace format {
  33. // This sets up an virtual file system with file \p FileName containing \p
  34. // Code.
  35. std::unique_ptr<Environment>
  36. Environment::CreateVirtualEnvironment(StringRef Code, StringRef FileName,
  37. ArrayRef<tooling::Range> Ranges,
  38. unsigned FirstStartColumn,
  39. unsigned NextStartColumn,
  40. unsigned LastStartColumn) {
  41. // This is referenced by `FileMgr` and will be released by `FileMgr` when it
  42. // is deleted.
  43. IntrusiveRefCntPtr<vfs::InMemoryFileSystem> InMemoryFileSystem(
  44. new vfs::InMemoryFileSystem);
  45. // This is passed to `SM` as reference, so the pointer has to be referenced
  46. // in `Environment` so that `FileMgr` can out-live this function scope.
  47. std::unique_ptr<FileManager> FileMgr(
  48. new FileManager(FileSystemOptions(), InMemoryFileSystem));
  49. // This is passed to `SM` as reference, so the pointer has to be referenced
  50. // by `Environment` due to the same reason above.
  51. std::unique_ptr<DiagnosticsEngine> Diagnostics(new DiagnosticsEngine(
  52. IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs),
  53. new DiagnosticOptions));
  54. // This will be stored as reference, so the pointer has to be stored in
  55. // due to the same reason above.
  56. std::unique_ptr<SourceManager> VirtualSM(
  57. new SourceManager(*Diagnostics, *FileMgr));
  58. InMemoryFileSystem->addFile(
  59. FileName, 0,
  60. llvm::MemoryBuffer::getMemBuffer(Code, FileName,
  61. /*RequiresNullTerminator=*/false));
  62. FileID ID = VirtualSM->createFileID(FileMgr->getFile(FileName),
  63. SourceLocation(), clang::SrcMgr::C_User);
  64. assert(ID.isValid());
  65. SourceLocation StartOfFile = VirtualSM->getLocForStartOfFile(ID);
  66. std::vector<CharSourceRange> CharRanges;
  67. for (const tooling::Range &Range : Ranges) {
  68. SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
  69. SourceLocation End = Start.getLocWithOffset(Range.getLength());
  70. CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
  71. }
  72. return llvm::make_unique<Environment>(
  73. ID, std::move(FileMgr), std::move(VirtualSM), std::move(Diagnostics),
  74. CharRanges, FirstStartColumn, NextStartColumn, LastStartColumn);
  75. }
  76. TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
  77. : Style(Style), Env(Env),
  78. AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()),
  79. UnwrappedLines(1),
  80. Encoding(encoding::detectEncoding(
  81. Env.getSourceManager().getBufferData(Env.getFileID()))) {
  82. DEBUG(
  83. llvm::dbgs() << "File encoding: "
  84. << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown")
  85. << "\n");
  86. DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
  87. << "\n");
  88. }
  89. std::pair<tooling::Replacements, unsigned> TokenAnalyzer::process() {
  90. tooling::Replacements Result;
  91. FormatTokenLexer Tokens(Env.getSourceManager(), Env.getFileID(),
  92. Env.getFirstStartColumn(), Style, Encoding);
  93. UnwrappedLineParser Parser(Style, Tokens.getKeywords(),
  94. Env.getFirstStartColumn(), Tokens.lex(), *this);
  95. Parser.parse();
  96. assert(UnwrappedLines.rbegin()->empty());
  97. unsigned Penalty = 0;
  98. for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) {
  99. DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
  100. SmallVector<AnnotatedLine *, 16> AnnotatedLines;
  101. TokenAnnotator Annotator(Style, Tokens.getKeywords());
  102. for (unsigned i = 0, e = UnwrappedLines[Run].size(); i != e; ++i) {
  103. AnnotatedLines.push_back(new AnnotatedLine(UnwrappedLines[Run][i]));
  104. Annotator.annotate(*AnnotatedLines.back());
  105. }
  106. std::pair<tooling::Replacements, unsigned> RunResult =
  107. analyze(Annotator, AnnotatedLines, Tokens);
  108. DEBUG({
  109. llvm::dbgs() << "Replacements for run " << Run << ":\n";
  110. for (tooling::Replacements::const_iterator I = RunResult.first.begin(),
  111. E = RunResult.first.end();
  112. I != E; ++I) {
  113. llvm::dbgs() << I->toString() << "\n";
  114. }
  115. });
  116. for (unsigned i = 0, e = AnnotatedLines.size(); i != e; ++i) {
  117. delete AnnotatedLines[i];
  118. }
  119. Penalty += RunResult.second;
  120. for (const auto &R : RunResult.first) {
  121. auto Err = Result.add(R);
  122. // FIXME: better error handling here. For now, simply return an empty
  123. // Replacements to indicate failure.
  124. if (Err) {
  125. llvm::errs() << llvm::toString(std::move(Err)) << "\n";
  126. return {tooling::Replacements(), 0};
  127. }
  128. }
  129. }
  130. return {Result, Penalty};
  131. }
  132. void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine &TheLine) {
  133. assert(!UnwrappedLines.empty());
  134. UnwrappedLines.back().push_back(TheLine);
  135. }
  136. void TokenAnalyzer::finishRun() {
  137. UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
  138. }
  139. } // end namespace format
  140. } // end namespace clang