BuildTree.cpp 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. //===- BuildTree.cpp ------------------------------------------*- C++ -*-=====//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "clang/Tooling/Syntax/BuildTree.h"
  9. #include "clang/AST/RecursiveASTVisitor.h"
  10. #include "clang/AST/Stmt.h"
  11. #include "clang/Basic/LLVM.h"
  12. #include "clang/Basic/SourceLocation.h"
  13. #include "clang/Basic/SourceManager.h"
  14. #include "clang/Basic/TokenKinds.h"
  15. #include "clang/Lex/Lexer.h"
  16. #include "clang/Tooling/Syntax/Nodes.h"
  17. #include "clang/Tooling/Syntax/Tokens.h"
  18. #include "clang/Tooling/Syntax/Tree.h"
  19. #include "llvm/ADT/ArrayRef.h"
  20. #include "llvm/ADT/STLExtras.h"
  21. #include "llvm/ADT/SmallVector.h"
  22. #include "llvm/Support/Allocator.h"
  23. #include "llvm/Support/Casting.h"
  24. #include "llvm/Support/FormatVariadic.h"
  25. #include "llvm/Support/raw_ostream.h"
  26. #include <map>
  27. using namespace clang;
  28. /// A helper class for constructing the syntax tree while traversing a clang
  29. /// AST.
  30. ///
  31. /// At each point of the traversal we maintain a list of pending nodes.
  32. /// Initially all tokens are added as pending nodes. When processing a clang AST
  33. /// node, the clients need to:
  34. /// - create a corresponding syntax node,
  35. /// - assign roles to all pending child nodes with 'markChild' and
  36. /// 'markChildToken',
  37. /// - replace the child nodes with the new syntax node in the pending list
  38. /// with 'foldNode'.
  39. ///
  40. /// Note that all children are expected to be processed when building a node.
  41. ///
  42. /// Call finalize() to finish building the tree and consume the root node.
  43. class syntax::TreeBuilder {
  44. public:
  45. TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {}
  46. llvm::BumpPtrAllocator &allocator() { return Arena.allocator(); }
  47. /// Populate children for \p New node, assuming it covers tokens from \p
  48. /// Range.
  49. void foldNode(llvm::ArrayRef<syntax::Token> Range, syntax::Tree *New);
  50. /// Set role for a token starting at \p Loc.
  51. void markChildToken(SourceLocation Loc, tok::TokenKind Kind, NodeRole R);
  52. /// Finish building the tree and consume the root node.
  53. syntax::TranslationUnit *finalize() && {
  54. auto Tokens = Arena.tokenBuffer().expandedTokens();
  55. assert(!Tokens.empty());
  56. assert(Tokens.back().kind() == tok::eof);
  57. // Build the root of the tree, consuming all the children.
  58. Pending.foldChildren(Tokens.drop_back(),
  59. new (Arena.allocator()) syntax::TranslationUnit);
  60. return cast<syntax::TranslationUnit>(std::move(Pending).finalize());
  61. }
  62. /// getRange() finds the syntax tokens corresponding to the passed source
  63. /// locations.
  64. /// \p First is the start position of the first token and \p Last is the start
  65. /// position of the last token.
  66. llvm::ArrayRef<syntax::Token> getRange(SourceLocation First,
  67. SourceLocation Last) const {
  68. assert(First.isValid());
  69. assert(Last.isValid());
  70. assert(First == Last ||
  71. Arena.sourceManager().isBeforeInTranslationUnit(First, Last));
  72. return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
  73. }
  74. llvm::ArrayRef<syntax::Token> getRange(const Decl *D) const {
  75. return getRange(D->getBeginLoc(), D->getEndLoc());
  76. }
  77. llvm::ArrayRef<syntax::Token> getRange(const Stmt *S) const {
  78. return getRange(S->getBeginLoc(), S->getEndLoc());
  79. }
  80. private:
  81. /// Finds a token starting at \p L. The token must exist.
  82. const syntax::Token *findToken(SourceLocation L) const;
  83. /// A collection of trees covering the input tokens.
  84. /// When created, each tree corresponds to a single token in the file.
  85. /// Clients call 'foldChildren' to attach one or more subtrees to a parent
  86. /// node and update the list of trees accordingly.
  87. ///
  88. /// Ensures that added nodes properly nest and cover the whole token stream.
  89. struct Forest {
  90. Forest(syntax::Arena &A) {
  91. assert(!A.tokenBuffer().expandedTokens().empty());
  92. assert(A.tokenBuffer().expandedTokens().back().kind() == tok::eof);
  93. // Create all leaf nodes.
  94. // Note that we do not have 'eof' in the tree.
  95. for (auto &T : A.tokenBuffer().expandedTokens().drop_back())
  96. Trees.insert(Trees.end(),
  97. {&T, NodeAndRole{new (A.allocator()) syntax::Leaf(&T)}});
  98. }
  99. void assignRole(llvm::ArrayRef<syntax::Token> Range,
  100. syntax::NodeRole Role) {
  101. assert(!Range.empty());
  102. auto It = Trees.lower_bound(Range.begin());
  103. assert(It != Trees.end() && "no node found");
  104. assert(It->first == Range.begin() && "no child with the specified range");
  105. assert((std::next(It) == Trees.end() ||
  106. std::next(It)->first == Range.end()) &&
  107. "no child with the specified range");
  108. It->second.Role = Role;
  109. }
  110. /// Add \p Node to the forest and fill its children nodes based on the \p
  111. /// NodeRange.
  112. void foldChildren(llvm::ArrayRef<syntax::Token> NodeTokens,
  113. syntax::Tree *Node) {
  114. assert(!NodeTokens.empty());
  115. assert(Node->firstChild() == nullptr && "node already has children");
  116. auto *FirstToken = NodeTokens.begin();
  117. auto BeginChildren = Trees.lower_bound(FirstToken);
  118. assert(BeginChildren != Trees.end() &&
  119. BeginChildren->first == FirstToken &&
  120. "fold crosses boundaries of existing subtrees");
  121. auto EndChildren = Trees.lower_bound(NodeTokens.end());
  122. assert((EndChildren == Trees.end() ||
  123. EndChildren->first == NodeTokens.end()) &&
  124. "fold crosses boundaries of existing subtrees");
  125. // (!) we need to go in reverse order, because we can only prepend.
  126. for (auto It = EndChildren; It != BeginChildren; --It)
  127. Node->prependChildLowLevel(std::prev(It)->second.Node,
  128. std::prev(It)->second.Role);
  129. Trees.erase(BeginChildren, EndChildren);
  130. Trees.insert({FirstToken, NodeAndRole(Node)});
  131. }
  132. // EXPECTS: all tokens were consumed and are owned by a single root node.
  133. syntax::Node *finalize() && {
  134. assert(Trees.size() == 1);
  135. auto *Root = Trees.begin()->second.Node;
  136. Trees = {};
  137. return Root;
  138. }
  139. std::string str(const syntax::Arena &A) const {
  140. std::string R;
  141. for (auto It = Trees.begin(); It != Trees.end(); ++It) {
  142. unsigned CoveredTokens =
  143. It != Trees.end()
  144. ? (std::next(It)->first - It->first)
  145. : A.tokenBuffer().expandedTokens().end() - It->first;
  146. R += llvm::formatv("- '{0}' covers '{1}'+{2} tokens\n",
  147. It->second.Node->kind(),
  148. It->first->text(A.sourceManager()), CoveredTokens);
  149. R += It->second.Node->dump(A);
  150. }
  151. return R;
  152. }
  153. private:
  154. /// A with a role that should be assigned to it when adding to a parent.
  155. struct NodeAndRole {
  156. explicit NodeAndRole(syntax::Node *Node)
  157. : Node(Node), Role(NodeRole::Unknown) {}
  158. syntax::Node *Node;
  159. NodeRole Role;
  160. };
  161. /// Maps from the start token to a subtree starting at that token.
  162. /// FIXME: storing the end tokens is redundant.
  163. /// FIXME: the key of a map is redundant, it is also stored in NodeForRange.
  164. std::map<const syntax::Token *, NodeAndRole> Trees;
  165. };
  166. /// For debugging purposes.
  167. std::string str() { return Pending.str(Arena); }
  168. syntax::Arena &Arena;
  169. Forest Pending;
  170. };
  171. namespace {
  172. class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
  173. public:
  174. explicit BuildTreeVisitor(ASTContext &Ctx, syntax::TreeBuilder &Builder)
  175. : Builder(Builder), LangOpts(Ctx.getLangOpts()) {}
  176. bool shouldTraversePostOrder() const { return true; }
  177. bool TraverseDecl(Decl *D) {
  178. if (!D || isa<TranslationUnitDecl>(D))
  179. return RecursiveASTVisitor::TraverseDecl(D);
  180. if (!llvm::isa<TranslationUnitDecl>(D->getDeclContext()))
  181. return true; // Only build top-level decls for now, do not recurse.
  182. return RecursiveASTVisitor::TraverseDecl(D);
  183. }
  184. bool VisitDecl(Decl *D) {
  185. assert(llvm::isa<TranslationUnitDecl>(D->getDeclContext()) &&
  186. "expected a top-level decl");
  187. assert(!D->isImplicit());
  188. Builder.foldNode(Builder.getRange(D),
  189. new (allocator()) syntax::TopLevelDeclaration());
  190. return true;
  191. }
  192. bool WalkUpFromTranslationUnitDecl(TranslationUnitDecl *TU) {
  193. // (!) we do not want to call VisitDecl(), the declaration for translation
  194. // unit is built by finalize().
  195. return true;
  196. }
  197. bool WalkUpFromCompoundStmt(CompoundStmt *S) {
  198. using NodeRole = syntax::NodeRole;
  199. Builder.markChildToken(S->getLBracLoc(), tok::l_brace,
  200. NodeRole::CompoundStatement_lbrace);
  201. Builder.markChildToken(S->getRBracLoc(), tok::r_brace,
  202. NodeRole::CompoundStatement_rbrace);
  203. Builder.foldNode(Builder.getRange(S),
  204. new (allocator()) syntax::CompoundStatement);
  205. return true;
  206. }
  207. private:
  208. /// A small helper to save some typing.
  209. llvm::BumpPtrAllocator &allocator() { return Builder.allocator(); }
  210. syntax::TreeBuilder &Builder;
  211. const LangOptions &LangOpts;
  212. };
  213. } // namespace
  214. void syntax::TreeBuilder::foldNode(llvm::ArrayRef<syntax::Token> Range,
  215. syntax::Tree *New) {
  216. Pending.foldChildren(Range, New);
  217. }
  218. void syntax::TreeBuilder::markChildToken(SourceLocation Loc,
  219. tok::TokenKind Kind, NodeRole Role) {
  220. if (Loc.isInvalid())
  221. return;
  222. Pending.assignRole(*findToken(Loc), Role);
  223. }
  224. const syntax::Token *syntax::TreeBuilder::findToken(SourceLocation L) const {
  225. auto Tokens = Arena.tokenBuffer().expandedTokens();
  226. auto &SM = Arena.sourceManager();
  227. auto It = llvm::partition_point(Tokens, [&](const syntax::Token &T) {
  228. return SM.isBeforeInTranslationUnit(T.location(), L);
  229. });
  230. assert(It != Tokens.end());
  231. assert(It->location() == L);
  232. return &*It;
  233. }
  234. syntax::TranslationUnit *
  235. syntax::buildSyntaxTree(Arena &A, const TranslationUnitDecl &TU) {
  236. TreeBuilder Builder(A);
  237. BuildTreeVisitor(TU.getASTContext(), Builder).TraverseAST(TU.getASTContext());
  238. return std::move(Builder).finalize();
  239. }