HeaderIncludes.cpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330
  1. //===--- HeaderIncludes.cpp - Insert/Delete #includes --*- C++ -*----------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. #include "clang/Tooling/Core/HeaderIncludes.h"
  10. #include "clang/Basic/SourceManager.h"
  11. #include "clang/Lex/Lexer.h"
  12. namespace clang {
  13. namespace tooling {
  14. namespace {
  15. LangOptions createLangOpts() {
  16. LangOptions LangOpts;
  17. LangOpts.CPlusPlus = 1;
  18. LangOpts.CPlusPlus11 = 1;
  19. LangOpts.CPlusPlus14 = 1;
  20. LangOpts.LineComment = 1;
  21. LangOpts.CXXOperatorNames = 1;
  22. LangOpts.Bool = 1;
  23. LangOpts.ObjC1 = 1;
  24. LangOpts.ObjC2 = 1;
  25. LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally.
  26. LangOpts.DeclSpecKeyword = 1; // To get __declspec.
  27. LangOpts.WChar = 1; // To get wchar_t
  28. return LangOpts;
  29. }
  30. // Returns the offset after skipping a sequence of tokens, matched by \p
  31. // GetOffsetAfterSequence, from the start of the code.
  32. // \p GetOffsetAfterSequence should be a function that matches a sequence of
  33. // tokens and returns an offset after the sequence.
  34. unsigned getOffsetAfterTokenSequence(
  35. StringRef FileName, StringRef Code, const IncludeStyle &Style,
  36. llvm::function_ref<unsigned(const SourceManager &, Lexer &, Token &)>
  37. GetOffsetAfterSequence) {
  38. SourceManagerForFile VirtualSM(FileName, Code);
  39. SourceManager &SM = VirtualSM.get();
  40. Lexer Lex(SM.getMainFileID(), SM.getBuffer(SM.getMainFileID()), SM,
  41. createLangOpts());
  42. Token Tok;
  43. // Get the first token.
  44. Lex.LexFromRawLexer(Tok);
  45. return GetOffsetAfterSequence(SM, Lex, Tok);
  46. }
  47. // Check if a sequence of tokens is like "#<Name> <raw_identifier>". If it is,
  48. // \p Tok will be the token after this directive; otherwise, it can be any token
  49. // after the given \p Tok (including \p Tok).
  50. bool checkAndConsumeDirectiveWithName(Lexer &Lex, StringRef Name, Token &Tok) {
  51. bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
  52. Tok.is(tok::raw_identifier) &&
  53. Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) &&
  54. Tok.is(tok::raw_identifier);
  55. if (Matched)
  56. Lex.LexFromRawLexer(Tok);
  57. return Matched;
  58. }
  59. void skipComments(Lexer &Lex, Token &Tok) {
  60. while (Tok.is(tok::comment))
  61. if (Lex.LexFromRawLexer(Tok))
  62. return;
  63. }
  64. // Returns the offset after header guard directives and any comments
  65. // before/after header guards. If no header guard presents in the code, this
  66. // will returns the offset after skipping all comments from the start of the
  67. // code.
  68. unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
  69. StringRef Code,
  70. const IncludeStyle &Style) {
  71. return getOffsetAfterTokenSequence(
  72. FileName, Code, Style,
  73. [](const SourceManager &SM, Lexer &Lex, Token Tok) {
  74. skipComments(Lex, Tok);
  75. unsigned InitialOffset = SM.getFileOffset(Tok.getLocation());
  76. if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
  77. skipComments(Lex, Tok);
  78. if (checkAndConsumeDirectiveWithName(Lex, "define", Tok))
  79. return SM.getFileOffset(Tok.getLocation());
  80. }
  81. return InitialOffset;
  82. });
  83. }
  84. // Check if a sequence of tokens is like
  85. // "#include ("header.h" | <header.h>)".
  86. // If it is, \p Tok will be the token after this directive; otherwise, it can be
  87. // any token after the given \p Tok (including \p Tok).
  88. bool checkAndConsumeInclusiveDirective(Lexer &Lex, Token &Tok) {
  89. auto Matched = [&]() {
  90. Lex.LexFromRawLexer(Tok);
  91. return true;
  92. };
  93. if (Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
  94. Tok.is(tok::raw_identifier) && Tok.getRawIdentifier() == "include") {
  95. if (Lex.LexFromRawLexer(Tok))
  96. return false;
  97. if (Tok.is(tok::string_literal))
  98. return Matched();
  99. if (Tok.is(tok::less)) {
  100. while (!Lex.LexFromRawLexer(Tok) && Tok.isNot(tok::greater)) {
  101. }
  102. if (Tok.is(tok::greater))
  103. return Matched();
  104. }
  105. }
  106. return false;
  107. }
  108. // Returns the offset of the last #include directive after which a new
  109. // #include can be inserted. This ignores #include's after the #include block(s)
  110. // in the beginning of a file to avoid inserting headers into code sections
  111. // where new #include's should not be added by default.
  112. // These code sections include:
  113. // - raw string literals (containing #include).
  114. // - #if blocks.
  115. // - Special #include's among declarations (e.g. functions).
  116. //
  117. // If no #include after which a new #include can be inserted, this returns the
  118. // offset after skipping all comments from the start of the code.
  119. // Inserting after an #include is not allowed if it comes after code that is not
  120. // #include (e.g. pre-processing directive that is not #include, declarations).
  121. unsigned getMaxHeaderInsertionOffset(StringRef FileName, StringRef Code,
  122. const IncludeStyle &Style) {
  123. return getOffsetAfterTokenSequence(
  124. FileName, Code, Style,
  125. [](const SourceManager &SM, Lexer &Lex, Token Tok) {
  126. skipComments(Lex, Tok);
  127. unsigned MaxOffset = SM.getFileOffset(Tok.getLocation());
  128. while (checkAndConsumeInclusiveDirective(Lex, Tok))
  129. MaxOffset = SM.getFileOffset(Tok.getLocation());
  130. return MaxOffset;
  131. });
  132. }
  133. inline StringRef trimInclude(StringRef IncludeName) {
  134. return IncludeName.trim("\"<>");
  135. }
  136. const char IncludeRegexPattern[] =
  137. R"(^[\t\ ]*#[\t\ ]*(import|include)[^"<]*(["<][^">]*[">]))";
  138. } // anonymous namespace
  139. IncludeCategoryManager::IncludeCategoryManager(const IncludeStyle &Style,
  140. StringRef FileName)
  141. : Style(Style), FileName(FileName) {
  142. FileStem = llvm::sys::path::stem(FileName);
  143. for (const auto &Category : Style.IncludeCategories)
  144. CategoryRegexs.emplace_back(Category.Regex, llvm::Regex::IgnoreCase);
  145. IsMainFile = FileName.endswith(".c") || FileName.endswith(".cc") ||
  146. FileName.endswith(".cpp") || FileName.endswith(".c++") ||
  147. FileName.endswith(".cxx") || FileName.endswith(".m") ||
  148. FileName.endswith(".mm");
  149. }
  150. int IncludeCategoryManager::getIncludePriority(StringRef IncludeName,
  151. bool CheckMainHeader) const {
  152. int Ret = INT_MAX;
  153. for (unsigned i = 0, e = CategoryRegexs.size(); i != e; ++i)
  154. if (CategoryRegexs[i].match(IncludeName)) {
  155. Ret = Style.IncludeCategories[i].Priority;
  156. break;
  157. }
  158. if (CheckMainHeader && IsMainFile && Ret > 0 && isMainHeader(IncludeName))
  159. Ret = 0;
  160. return Ret;
  161. }
  162. bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const {
  163. if (!IncludeName.startswith("\""))
  164. return false;
  165. StringRef HeaderStem =
  166. llvm::sys::path::stem(IncludeName.drop_front(1).drop_back(1));
  167. if (FileStem.startswith(HeaderStem) ||
  168. FileStem.startswith_lower(HeaderStem)) {
  169. llvm::Regex MainIncludeRegex((HeaderStem + Style.IncludeIsMainRegex).str(),
  170. llvm::Regex::IgnoreCase);
  171. if (MainIncludeRegex.match(FileStem))
  172. return true;
  173. }
  174. return false;
  175. }
  176. HeaderIncludes::HeaderIncludes(StringRef FileName, StringRef Code,
  177. const IncludeStyle &Style)
  178. : FileName(FileName), Code(Code), FirstIncludeOffset(-1),
  179. MinInsertOffset(
  180. getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style)),
  181. MaxInsertOffset(MinInsertOffset +
  182. getMaxHeaderInsertionOffset(
  183. FileName, Code.drop_front(MinInsertOffset), Style)),
  184. Categories(Style, FileName),
  185. IncludeRegex(llvm::Regex(IncludeRegexPattern)) {
  186. // Add 0 for main header and INT_MAX for headers that are not in any
  187. // category.
  188. Priorities = {0, INT_MAX};
  189. for (const auto &Category : Style.IncludeCategories)
  190. Priorities.insert(Category.Priority);
  191. SmallVector<StringRef, 32> Lines;
  192. Code.drop_front(MinInsertOffset).split(Lines, "\n");
  193. unsigned Offset = MinInsertOffset;
  194. unsigned NextLineOffset;
  195. SmallVector<StringRef, 4> Matches;
  196. for (auto Line : Lines) {
  197. NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
  198. if (IncludeRegex.match(Line, &Matches)) {
  199. // If this is the last line without trailing newline, we need to make
  200. // sure we don't delete across the file boundary.
  201. addExistingInclude(
  202. Include(Matches[2],
  203. tooling::Range(
  204. Offset, std::min(Line.size() + 1, Code.size() - Offset))),
  205. NextLineOffset);
  206. }
  207. Offset = NextLineOffset;
  208. }
  209. // Populate CategoryEndOfssets:
  210. // - Ensure that CategoryEndOffset[Highest] is always populated.
  211. // - If CategoryEndOffset[Priority] isn't set, use the next higher value
  212. // that is set, up to CategoryEndOffset[Highest].
  213. auto Highest = Priorities.begin();
  214. if (CategoryEndOffsets.find(*Highest) == CategoryEndOffsets.end()) {
  215. if (FirstIncludeOffset >= 0)
  216. CategoryEndOffsets[*Highest] = FirstIncludeOffset;
  217. else
  218. CategoryEndOffsets[*Highest] = MinInsertOffset;
  219. }
  220. // By this point, CategoryEndOffset[Highest] is always set appropriately:
  221. // - to an appropriate location before/after existing #includes, or
  222. // - to right after the header guard, or
  223. // - to the beginning of the file.
  224. for (auto I = ++Priorities.begin(), E = Priorities.end(); I != E; ++I)
  225. if (CategoryEndOffsets.find(*I) == CategoryEndOffsets.end())
  226. CategoryEndOffsets[*I] = CategoryEndOffsets[*std::prev(I)];
  227. }
  228. // \p Offset: the start of the line following this include directive.
  229. void HeaderIncludes::addExistingInclude(Include IncludeToAdd,
  230. unsigned NextLineOffset) {
  231. auto Iter =
  232. ExistingIncludes.try_emplace(trimInclude(IncludeToAdd.Name)).first;
  233. Iter->second.push_back(std::move(IncludeToAdd));
  234. auto &CurInclude = Iter->second.back();
  235. // The header name with quotes or angle brackets.
  236. // Only record the offset of current #include if we can insert after it.
  237. if (CurInclude.R.getOffset() <= MaxInsertOffset) {
  238. int Priority = Categories.getIncludePriority(
  239. CurInclude.Name, /*CheckMainHeader=*/FirstIncludeOffset < 0);
  240. CategoryEndOffsets[Priority] = NextLineOffset;
  241. IncludesByPriority[Priority].push_back(&CurInclude);
  242. if (FirstIncludeOffset < 0)
  243. FirstIncludeOffset = CurInclude.R.getOffset();
  244. }
  245. }
  246. llvm::Optional<tooling::Replacement>
  247. HeaderIncludes::insert(llvm::StringRef IncludeName, bool IsAngled) const {
  248. assert(IncludeName == trimInclude(IncludeName));
  249. // If a <header> ("header") already exists in code, "header" (<header>) with
  250. // different quotation will still be inserted.
  251. // FIXME: figure out if this is the best behavior.
  252. auto It = ExistingIncludes.find(IncludeName);
  253. if (It != ExistingIncludes.end())
  254. for (const auto &Inc : It->second)
  255. if ((IsAngled && StringRef(Inc.Name).startswith("<")) ||
  256. (!IsAngled && StringRef(Inc.Name).startswith("\"")))
  257. return llvm::None;
  258. std::string Quoted = IsAngled ? ("<" + IncludeName + ">").str()
  259. : ("\"" + IncludeName + "\"").str();
  260. StringRef QuotedName = Quoted;
  261. int Priority = Categories.getIncludePriority(
  262. QuotedName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
  263. auto CatOffset = CategoryEndOffsets.find(Priority);
  264. assert(CatOffset != CategoryEndOffsets.end());
  265. unsigned InsertOffset = CatOffset->second; // Fall back offset
  266. auto Iter = IncludesByPriority.find(Priority);
  267. if (Iter != IncludesByPriority.end()) {
  268. for (const auto *Inc : Iter->second) {
  269. if (QuotedName < Inc->Name) {
  270. InsertOffset = Inc->R.getOffset();
  271. break;
  272. }
  273. }
  274. }
  275. assert(InsertOffset <= Code.size());
  276. std::string NewInclude = ("#include " + QuotedName + "\n").str();
  277. // When inserting headers at end of the code, also append '\n' to the code
  278. // if it does not end with '\n'.
  279. // FIXME: when inserting multiple #includes at the end of code, only one
  280. // newline should be added.
  281. if (InsertOffset == Code.size() && (!Code.empty() && Code.back() != '\n'))
  282. NewInclude = "\n" + NewInclude;
  283. return tooling::Replacement(FileName, InsertOffset, 0, NewInclude);
  284. }
  285. tooling::Replacements HeaderIncludes::remove(llvm::StringRef IncludeName,
  286. bool IsAngled) const {
  287. assert(IncludeName == trimInclude(IncludeName));
  288. tooling::Replacements Result;
  289. auto Iter = ExistingIncludes.find(IncludeName);
  290. if (Iter == ExistingIncludes.end())
  291. return Result;
  292. for (const auto &Inc : Iter->second) {
  293. if ((IsAngled && StringRef(Inc.Name).startswith("\"")) ||
  294. (!IsAngled && StringRef(Inc.Name).startswith("<")))
  295. continue;
  296. llvm::Error Err = Result.add(tooling::Replacement(
  297. FileName, Inc.R.getOffset(), Inc.R.getLength(), ""));
  298. if (Err) {
  299. auto ErrMsg = "Unexpected conflicts in #include deletions: " +
  300. llvm::toString(std::move(Err));
  301. llvm_unreachable(ErrMsg.c_str());
  302. }
  303. }
  304. return Result;
  305. }
  306. } // namespace tooling
  307. } // namespace clang