InclusionRewriter.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361
  1. //===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This code rewrites include invocations into their expansions. This gives you
  11. // a file with all included files merged into it.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "clang/Rewrite/Frontend/Rewriters.h"
  15. #include "clang/Lex/Preprocessor.h"
  16. #include "clang/Basic/SourceManager.h"
  17. #include "clang/Frontend/PreprocessorOutputOptions.h"
  18. #include "llvm/Support/raw_ostream.h"
  19. using namespace clang;
  20. using namespace llvm;
  21. namespace {
  22. class InclusionRewriter : public PPCallbacks {
  23. /// Information about which #includes were actually performed,
  24. /// created by preprocessor callbacks.
  25. struct FileChange {
  26. SourceLocation From;
  27. FileID Id;
  28. SrcMgr::CharacteristicKind FileType;
  29. FileChange(SourceLocation From) : From(From) {
  30. }
  31. };
  32. Preprocessor &PP; ///< Used to find inclusion directives.
  33. SourceManager &SM; ///< Used to read and manage source files.
  34. raw_ostream &OS; ///< The destination stream for rewritten contents.
  35. bool ShowLineMarkers; ///< Show #line markers.
  36. bool UseLineDirective; ///< Use of line directives or line markers.
  37. typedef std::map<unsigned, FileChange> FileChangeMap;
  38. FileChangeMap FileChanges; /// Tracks which files were included where.
  39. /// Used transitively for building up the FileChanges mapping over the
  40. /// various \c PPCallbacks callbacks.
  41. FileChangeMap::iterator LastInsertedFileChange;
  42. public:
  43. InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
  44. bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
  45. private:
  46. virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
  47. SrcMgr::CharacteristicKind FileType,
  48. FileID PrevFID);
  49. virtual void FileSkipped(const FileEntry &ParentFile,
  50. const Token &FilenameTok,
  51. SrcMgr::CharacteristicKind FileType);
  52. virtual void InclusionDirective(SourceLocation HashLoc,
  53. const Token &IncludeTok,
  54. StringRef FileName,
  55. bool IsAngled,
  56. CharSourceRange FilenameRange,
  57. const FileEntry *File,
  58. StringRef SearchPath,
  59. StringRef RelativePath);
  60. void WriteLineInfo(const char *Filename, int Line,
  61. SrcMgr::CharacteristicKind FileType,
  62. StringRef EOL, StringRef Extra = StringRef());
  63. void OutputContentUpTo(const MemoryBuffer &FromFile,
  64. unsigned &WriteFrom, unsigned WriteTo,
  65. StringRef EOL, int &lines,
  66. bool EnsureNewline = false);
  67. void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
  68. const MemoryBuffer &FromFile, StringRef EOL,
  69. unsigned &NextToWrite, int &Lines);
  70. const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
  71. StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
  72. };
  73. } // end anonymous namespace
  74. /// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
  75. InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
  76. bool ShowLineMarkers)
  77. : PP(PP), SM(PP.getSourceManager()), OS(OS),
  78. ShowLineMarkers(ShowLineMarkers),
  79. LastInsertedFileChange(FileChanges.end()) {
  80. // If we're in microsoft mode, use normal #line instead of line markers.
  81. UseLineDirective = PP.getLangOpts().MicrosoftExt;
  82. }
  83. /// Write appropriate line information as either #line directives or GNU line
  84. /// markers depending on what mode we're in, including the \p Filename and
  85. /// \p Line we are located at, using the specified \p EOL line separator, and
  86. /// any \p Extra context specifiers in GNU line directives.
  87. void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
  88. SrcMgr::CharacteristicKind FileType,
  89. StringRef EOL, StringRef Extra) {
  90. if (!ShowLineMarkers)
  91. return;
  92. if (UseLineDirective) {
  93. OS << "#line" << ' ' << Line << ' ' << '"' << Filename << '"';
  94. } else {
  95. // Use GNU linemarkers as described here:
  96. // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
  97. OS << '#' << ' ' << Line << ' ' << '"' << Filename << '"';
  98. if (!Extra.empty())
  99. OS << Extra;
  100. if (FileType == SrcMgr::C_System)
  101. // "`3' This indicates that the following text comes from a system header
  102. // file, so certain warnings should be suppressed."
  103. OS << " 3";
  104. else if (FileType == SrcMgr::C_ExternCSystem)
  105. // as above for `3', plus "`4' This indicates that the following text
  106. // should be treated as being wrapped in an implicit extern "C" block."
  107. OS << " 3 4";
  108. }
  109. OS << EOL;
  110. }
  111. /// FileChanged - Whenever the preprocessor enters or exits a #include file
  112. /// it invokes this handler.
  113. void InclusionRewriter::FileChanged(SourceLocation Loc,
  114. FileChangeReason Reason,
  115. SrcMgr::CharacteristicKind NewFileType,
  116. FileID) {
  117. if (Reason != EnterFile)
  118. return;
  119. if (LastInsertedFileChange == FileChanges.end())
  120. // we didn't reach this file (eg: the main file) via an inclusion directive
  121. return;
  122. LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
  123. LastInsertedFileChange->second.FileType = NewFileType;
  124. LastInsertedFileChange = FileChanges.end();
  125. }
  126. /// Called whenever an inclusion is skipped due to canonical header protection
  127. /// macros.
  128. void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
  129. const Token &/*FilenameTok*/,
  130. SrcMgr::CharacteristicKind /*FileType*/) {
  131. assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
  132. "found via an inclusion directive, was skipped");
  133. FileChanges.erase(LastInsertedFileChange);
  134. LastInsertedFileChange = FileChanges.end();
  135. }
  136. /// This should be called whenever the preprocessor encounters include
  137. /// directives. It does not say whether the file has been included, but it
  138. /// provides more information about the directive (hash location instead
  139. /// of location inside the included file). It is assumed that the matching
  140. /// FileChanged() or FileSkipped() is called after this.
  141. void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
  142. const Token &/*IncludeTok*/,
  143. StringRef /*FileName*/,
  144. bool /*IsAngled*/,
  145. CharSourceRange /*FilenameRange*/,
  146. const FileEntry * /*File*/,
  147. StringRef /*SearchPath*/,
  148. StringRef /*RelativePath*/) {
  149. assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
  150. "directive was found before the previous one was processed");
  151. std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
  152. std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc)));
  153. assert(p.second && "Unexpected revisitation of the same include directive");
  154. LastInsertedFileChange = p.first;
  155. }
  156. /// Simple lookup for a SourceLocation (specifically one denoting the hash in
  157. /// an inclusion directive) in the map of inclusion information, FileChanges.
  158. const InclusionRewriter::FileChange *
  159. InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
  160. FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
  161. if (I != FileChanges.end())
  162. return &I->second;
  163. return NULL;
  164. }
  165. /// Detect the likely line ending style of \p FromFile by examining the first
  166. /// newline found within it.
  167. static StringRef DetectEOL(const MemoryBuffer &FromFile) {
  168. // detect what line endings the file uses, so that added content does not mix
  169. // the style
  170. const char *Pos = strchr(FromFile.getBufferStart(), '\n');
  171. if (Pos == NULL)
  172. return "\n";
  173. if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
  174. return "\n\r";
  175. if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
  176. return "\r\n";
  177. return "\n";
  178. }
  179. /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
  180. /// \p WriteTo - 1.
  181. void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
  182. unsigned &WriteFrom, unsigned WriteTo,
  183. StringRef EOL, int &Line,
  184. bool EnsureNewline) {
  185. if (WriteTo <= WriteFrom)
  186. return;
  187. OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
  188. // count lines manually, it's faster than getPresumedLoc()
  189. Line += std::count(FromFile.getBufferStart() + WriteFrom,
  190. FromFile.getBufferStart() + WriteTo, '\n');
  191. if (EnsureNewline) {
  192. char LastChar = FromFile.getBufferStart()[WriteTo - 1];
  193. if (LastChar != '\n' && LastChar != '\r')
  194. OS << EOL;
  195. }
  196. WriteFrom = WriteTo;
  197. }
  198. /// Print characters from \p FromFile starting at \p NextToWrite up until the
  199. /// inclusion directive at \p StartToken, then print out the inclusion
  200. /// inclusion directive disabled by a #if directive, updating \p NextToWrite
  201. /// and \p Line to track the number of source lines visited and the progress
  202. /// through the \p FromFile buffer.
  203. void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
  204. const Token &StartToken,
  205. const MemoryBuffer &FromFile,
  206. StringRef EOL,
  207. unsigned &NextToWrite, int &Line) {
  208. OutputContentUpTo(FromFile, NextToWrite,
  209. SM.getFileOffset(StartToken.getLocation()), EOL, Line);
  210. Token DirectiveToken;
  211. do {
  212. DirectiveLex.LexFromRawLexer(DirectiveToken);
  213. } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
  214. OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
  215. OutputContentUpTo(FromFile, NextToWrite,
  216. SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
  217. EOL, Line);
  218. OS << "#endif /* expanded by -frewrite-includes */" << EOL;
  219. }
  220. /// Find the next identifier in the pragma directive specified by \p RawToken.
  221. StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
  222. Token &RawToken) {
  223. RawLex.LexFromRawLexer(RawToken);
  224. if (RawToken.is(tok::raw_identifier))
  225. PP.LookUpIdentifierInfo(RawToken);
  226. if (RawToken.is(tok::identifier))
  227. return RawToken.getIdentifierInfo()->getName();
  228. return StringRef();
  229. }
  230. /// Use a raw lexer to analyze \p FileId, inccrementally copying parts of it
  231. /// and including content of included files recursively.
  232. bool InclusionRewriter::Process(FileID FileId,
  233. SrcMgr::CharacteristicKind FileType)
  234. {
  235. bool Invalid;
  236. const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
  237. if (Invalid) // invalid inclusion
  238. return true;
  239. const char *FileName = FromFile.getBufferIdentifier();
  240. Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
  241. RawLex.SetCommentRetentionState(false);
  242. StringRef EOL = DetectEOL(FromFile);
  243. // Per the GNU docs: "1" indicates the start of a new file.
  244. WriteLineInfo(FileName, 1, FileType, EOL, " 1");
  245. if (SM.getFileIDSize(FileId) == 0)
  246. return true;
  247. // The next byte to be copied from the source file
  248. unsigned NextToWrite = 0;
  249. int Line = 1; // The current input file line number.
  250. Token RawToken;
  251. RawLex.LexFromRawLexer(RawToken);
  252. // TODO: Consider adding a switch that strips possibly unimportant content,
  253. // such as comments, to reduce the size of repro files.
  254. while (RawToken.isNot(tok::eof)) {
  255. if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
  256. RawLex.setParsingPreprocessorDirective(true);
  257. Token HashToken = RawToken;
  258. RawLex.LexFromRawLexer(RawToken);
  259. if (RawToken.is(tok::raw_identifier))
  260. PP.LookUpIdentifierInfo(RawToken);
  261. if (RawToken.is(tok::identifier)) {
  262. switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
  263. case tok::pp_include:
  264. case tok::pp_include_next:
  265. case tok::pp_import: {
  266. CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
  267. Line);
  268. if (const FileChange *Change = FindFileChangeLocation(
  269. HashToken.getLocation())) {
  270. // now include and recursively process the file
  271. if (Process(Change->Id, Change->FileType))
  272. // and set lineinfo back to this file, if the nested one was
  273. // actually included
  274. // `2' indicates returning to a file (after having included
  275. // another file.
  276. WriteLineInfo(FileName, Line, FileType, EOL, " 2");
  277. } else
  278. // fix up lineinfo (since commented out directive changed line
  279. // numbers) for inclusions that were skipped due to header guards
  280. WriteLineInfo(FileName, Line, FileType, EOL);
  281. break;
  282. }
  283. case tok::pp_pragma: {
  284. StringRef Identifier = NextIdentifierName(RawLex, RawToken);
  285. if (Identifier == "clang" || Identifier == "GCC") {
  286. if (NextIdentifierName(RawLex, RawToken) == "system_header") {
  287. // keep the directive in, commented out
  288. CommentOutDirective(RawLex, HashToken, FromFile, EOL,
  289. NextToWrite, Line);
  290. // update our own type
  291. FileType = SM.getFileCharacteristic(RawToken.getLocation());
  292. WriteLineInfo(FileName, Line, FileType, EOL);
  293. }
  294. } else if (Identifier == "once") {
  295. // keep the directive in, commented out
  296. CommentOutDirective(RawLex, HashToken, FromFile, EOL,
  297. NextToWrite, Line);
  298. WriteLineInfo(FileName, Line, FileType, EOL);
  299. }
  300. break;
  301. }
  302. default:
  303. break;
  304. }
  305. }
  306. RawLex.setParsingPreprocessorDirective(false);
  307. }
  308. RawLex.LexFromRawLexer(RawToken);
  309. }
  310. OutputContentUpTo(FromFile, NextToWrite,
  311. SM.getFileOffset(SM.getLocForEndOfFile(FileId)) + 1, EOL, Line,
  312. /*EnsureNewline*/true);
  313. return true;
  314. }
  315. /// InclusionRewriterInInput - Implement -frewrite-includes mode.
  316. void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
  317. const PreprocessorOutputOptions &Opts) {
  318. SourceManager &SM = PP.getSourceManager();
  319. InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
  320. Opts.ShowLineMarkers);
  321. PP.addPPCallbacks(Rewrite);
  322. // First let the preprocessor process the entire file and call callbacks.
  323. // Callbacks will record which #include's were actually performed.
  324. PP.EnterMainSourceFile();
  325. Token Tok;
  326. // Only preprocessor directives matter here, so disable macro expansion
  327. // everywhere else as an optimization.
  328. // TODO: It would be even faster if the preprocessor could be switched
  329. // to a mode where it would parse only preprocessor directives and comments,
  330. // nothing else matters for parsing or processing.
  331. PP.SetMacroExpansionOnlyInDirectives();
  332. do {
  333. PP.Lex(Tok);
  334. } while (Tok.isNot(tok::eof));
  335. Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
  336. OS->flush();
  337. }