123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550 |
- //===--- InclusionRewriter.cpp - Rewrite includes into their expansions ---===//
- //
- // The LLVM Compiler Infrastructure
- //
- // This file is distributed under the University of Illinois Open Source
- // License. See LICENSE.TXT for details.
- //
- //===----------------------------------------------------------------------===//
- //
- // This code rewrites include invocations into their expansions. This gives you
- // a file with all included files merged into it.
- //
- //===----------------------------------------------------------------------===//
- #include "clang/Rewrite/Frontend/Rewriters.h"
- #include "clang/Basic/SourceManager.h"
- #include "clang/Frontend/PreprocessorOutputOptions.h"
- #include "clang/Lex/HeaderSearch.h"
- #include "clang/Lex/Pragma.h"
- #include "clang/Lex/Preprocessor.h"
- #include "llvm/ADT/SmallString.h"
- #include "llvm/Support/raw_ostream.h"
- using namespace clang;
- using namespace llvm;
- namespace {
- class InclusionRewriter : public PPCallbacks {
- /// Information about which #includes were actually performed,
- /// created by preprocessor callbacks.
- struct FileChange {
- const Module *Mod;
- SourceLocation From;
- FileID Id;
- SrcMgr::CharacteristicKind FileType;
- FileChange(SourceLocation From, const Module *Mod) : Mod(Mod), From(From) {
- }
- };
- Preprocessor &PP; ///< Used to find inclusion directives.
- SourceManager &SM; ///< Used to read and manage source files.
- raw_ostream &OS; ///< The destination stream for rewritten contents.
- const llvm::MemoryBuffer *PredefinesBuffer; ///< The preprocessor predefines.
- bool ShowLineMarkers; ///< Show #line markers.
- bool UseLineDirective; ///< Use of line directives or line markers.
- typedef std::map<unsigned, FileChange> FileChangeMap;
- FileChangeMap FileChanges; ///< Tracks which files were included where.
- /// Used transitively for building up the FileChanges mapping over the
- /// various \c PPCallbacks callbacks.
- FileChangeMap::iterator LastInsertedFileChange;
- public:
- InclusionRewriter(Preprocessor &PP, raw_ostream &OS, bool ShowLineMarkers);
- bool Process(FileID FileId, SrcMgr::CharacteristicKind FileType);
- void setPredefinesBuffer(const llvm::MemoryBuffer *Buf) {
- PredefinesBuffer = Buf;
- }
- private:
- void FileChanged(SourceLocation Loc, FileChangeReason Reason,
- SrcMgr::CharacteristicKind FileType,
- FileID PrevFID) override;
- void FileSkipped(const FileEntry &ParentFile, const Token &FilenameTok,
- SrcMgr::CharacteristicKind FileType) override;
- void InclusionDirective(SourceLocation HashLoc, const Token &IncludeTok,
- StringRef FileName, bool IsAngled,
- CharSourceRange FilenameRange, const FileEntry *File,
- StringRef SearchPath, StringRef RelativePath,
- const Module *Imported) override;
- void WriteLineInfo(const char *Filename, int Line,
- SrcMgr::CharacteristicKind FileType,
- StringRef EOL, StringRef Extra = StringRef());
- void WriteImplicitModuleImport(const Module *Mod, StringRef EOL);
- void OutputContentUpTo(const MemoryBuffer &FromFile,
- unsigned &WriteFrom, unsigned WriteTo,
- StringRef EOL, int &lines,
- bool EnsureNewline);
- void CommentOutDirective(Lexer &DirectivesLex, const Token &StartToken,
- const MemoryBuffer &FromFile, StringRef EOL,
- unsigned &NextToWrite, int &Lines);
- bool HandleHasInclude(FileID FileId, Lexer &RawLex,
- const DirectoryLookup *Lookup, Token &Tok,
- bool &FileExists);
- const FileChange *FindFileChangeLocation(SourceLocation Loc) const;
- StringRef NextIdentifierName(Lexer &RawLex, Token &RawToken);
- };
- } // end anonymous namespace
- /// Initializes an InclusionRewriter with a \p PP source and \p OS destination.
- InclusionRewriter::InclusionRewriter(Preprocessor &PP, raw_ostream &OS,
- bool ShowLineMarkers)
- : PP(PP), SM(PP.getSourceManager()), OS(OS), PredefinesBuffer(nullptr),
- ShowLineMarkers(ShowLineMarkers),
- LastInsertedFileChange(FileChanges.end()) {
- // If we're in microsoft mode, use normal #line instead of line markers.
- UseLineDirective = PP.getLangOpts().MicrosoftExt;
- }
- /// Write appropriate line information as either #line directives or GNU line
- /// markers depending on what mode we're in, including the \p Filename and
- /// \p Line we are located at, using the specified \p EOL line separator, and
- /// any \p Extra context specifiers in GNU line directives.
- void InclusionRewriter::WriteLineInfo(const char *Filename, int Line,
- SrcMgr::CharacteristicKind FileType,
- StringRef EOL, StringRef Extra) {
- if (!ShowLineMarkers)
- return;
- if (UseLineDirective) {
- OS << "#line" << ' ' << Line << ' ' << '"';
- OS.write_escaped(Filename);
- OS << '"';
- } else {
- // Use GNU linemarkers as described here:
- // http://gcc.gnu.org/onlinedocs/cpp/Preprocessor-Output.html
- OS << '#' << ' ' << Line << ' ' << '"';
- OS.write_escaped(Filename);
- OS << '"';
- if (!Extra.empty())
- OS << Extra;
- if (FileType == SrcMgr::C_System)
- // "`3' This indicates that the following text comes from a system header
- // file, so certain warnings should be suppressed."
- OS << " 3";
- else if (FileType == SrcMgr::C_ExternCSystem)
- // as above for `3', plus "`4' This indicates that the following text
- // should be treated as being wrapped in an implicit extern "C" block."
- OS << " 3 4";
- }
- OS << EOL;
- }
- void InclusionRewriter::WriteImplicitModuleImport(const Module *Mod,
- StringRef EOL) {
- OS << "@import " << Mod->getFullModuleName() << ";"
- << " /* clang -frewrite-includes: implicit import */" << EOL;
- }
- /// FileChanged - Whenever the preprocessor enters or exits a #include file
- /// it invokes this handler.
- void InclusionRewriter::FileChanged(SourceLocation Loc,
- FileChangeReason Reason,
- SrcMgr::CharacteristicKind NewFileType,
- FileID) {
- if (Reason != EnterFile)
- return;
- if (LastInsertedFileChange == FileChanges.end())
- // we didn't reach this file (eg: the main file) via an inclusion directive
- return;
- LastInsertedFileChange->second.Id = FullSourceLoc(Loc, SM).getFileID();
- LastInsertedFileChange->second.FileType = NewFileType;
- LastInsertedFileChange = FileChanges.end();
- }
- /// Called whenever an inclusion is skipped due to canonical header protection
- /// macros.
- void InclusionRewriter::FileSkipped(const FileEntry &/*ParentFile*/,
- const Token &/*FilenameTok*/,
- SrcMgr::CharacteristicKind /*FileType*/) {
- assert(LastInsertedFileChange != FileChanges.end() && "A file, that wasn't "
- "found via an inclusion directive, was skipped");
- FileChanges.erase(LastInsertedFileChange);
- LastInsertedFileChange = FileChanges.end();
- }
- /// This should be called whenever the preprocessor encounters include
- /// directives. It does not say whether the file has been included, but it
- /// provides more information about the directive (hash location instead
- /// of location inside the included file). It is assumed that the matching
- /// FileChanged() or FileSkipped() is called after this.
- void InclusionRewriter::InclusionDirective(SourceLocation HashLoc,
- const Token &/*IncludeTok*/,
- StringRef /*FileName*/,
- bool /*IsAngled*/,
- CharSourceRange /*FilenameRange*/,
- const FileEntry * /*File*/,
- StringRef /*SearchPath*/,
- StringRef /*RelativePath*/,
- const Module *Imported) {
- assert(LastInsertedFileChange == FileChanges.end() && "Another inclusion "
- "directive was found before the previous one was processed");
- std::pair<FileChangeMap::iterator, bool> p = FileChanges.insert(
- std::make_pair(HashLoc.getRawEncoding(), FileChange(HashLoc, Imported)));
- assert(p.second && "Unexpected revisitation of the same include directive");
- if (!Imported)
- LastInsertedFileChange = p.first;
- }
- /// Simple lookup for a SourceLocation (specifically one denoting the hash in
- /// an inclusion directive) in the map of inclusion information, FileChanges.
- const InclusionRewriter::FileChange *
- InclusionRewriter::FindFileChangeLocation(SourceLocation Loc) const {
- FileChangeMap::const_iterator I = FileChanges.find(Loc.getRawEncoding());
- if (I != FileChanges.end())
- return &I->second;
- return nullptr;
- }
- /// Detect the likely line ending style of \p FromFile by examining the first
- /// newline found within it.
- static StringRef DetectEOL(const MemoryBuffer &FromFile) {
- // detect what line endings the file uses, so that added content does not mix
- // the style
- const char *Pos = strchr(FromFile.getBufferStart(), '\n');
- if (!Pos)
- return "\n";
- if (Pos + 1 < FromFile.getBufferEnd() && Pos[1] == '\r')
- return "\n\r";
- if (Pos - 1 >= FromFile.getBufferStart() && Pos[-1] == '\r')
- return "\r\n";
- return "\n";
- }
- /// Writes out bytes from \p FromFile, starting at \p NextToWrite and ending at
- /// \p WriteTo - 1.
- void InclusionRewriter::OutputContentUpTo(const MemoryBuffer &FromFile,
- unsigned &WriteFrom, unsigned WriteTo,
- StringRef EOL, int &Line,
- bool EnsureNewline) {
- if (WriteTo <= WriteFrom)
- return;
- if (&FromFile == PredefinesBuffer) {
- // Ignore the #defines of the predefines buffer.
- WriteFrom = WriteTo;
- return;
- }
- OS.write(FromFile.getBufferStart() + WriteFrom, WriteTo - WriteFrom);
- // count lines manually, it's faster than getPresumedLoc()
- Line += std::count(FromFile.getBufferStart() + WriteFrom,
- FromFile.getBufferStart() + WriteTo, '\n');
- if (EnsureNewline) {
- char LastChar = FromFile.getBufferStart()[WriteTo - 1];
- if (LastChar != '\n' && LastChar != '\r')
- OS << EOL;
- }
- WriteFrom = WriteTo;
- }
- /// Print characters from \p FromFile starting at \p NextToWrite up until the
- /// inclusion directive at \p StartToken, then print out the inclusion
- /// inclusion directive disabled by a #if directive, updating \p NextToWrite
- /// and \p Line to track the number of source lines visited and the progress
- /// through the \p FromFile buffer.
- void InclusionRewriter::CommentOutDirective(Lexer &DirectiveLex,
- const Token &StartToken,
- const MemoryBuffer &FromFile,
- StringRef EOL,
- unsigned &NextToWrite, int &Line) {
- OutputContentUpTo(FromFile, NextToWrite,
- SM.getFileOffset(StartToken.getLocation()), EOL, Line, false);
- Token DirectiveToken;
- do {
- DirectiveLex.LexFromRawLexer(DirectiveToken);
- } while (!DirectiveToken.is(tok::eod) && DirectiveToken.isNot(tok::eof));
- if (&FromFile == PredefinesBuffer) {
- // OutputContentUpTo() would not output anything anyway.
- return;
- }
- OS << "#if 0 /* expanded by -frewrite-includes */" << EOL;
- OutputContentUpTo(FromFile, NextToWrite,
- SM.getFileOffset(DirectiveToken.getLocation()) + DirectiveToken.getLength(),
- EOL, Line, true);
- OS << "#endif /* expanded by -frewrite-includes */" << EOL;
- }
- /// Find the next identifier in the pragma directive specified by \p RawToken.
- StringRef InclusionRewriter::NextIdentifierName(Lexer &RawLex,
- Token &RawToken) {
- RawLex.LexFromRawLexer(RawToken);
- if (RawToken.is(tok::raw_identifier))
- PP.LookUpIdentifierInfo(RawToken);
- if (RawToken.is(tok::identifier))
- return RawToken.getIdentifierInfo()->getName();
- return StringRef();
- }
- // Expand __has_include and __has_include_next if possible. If there's no
- // definitive answer return false.
- bool InclusionRewriter::HandleHasInclude(
- FileID FileId, Lexer &RawLex, const DirectoryLookup *Lookup, Token &Tok,
- bool &FileExists) {
- // Lex the opening paren.
- RawLex.LexFromRawLexer(Tok);
- if (Tok.isNot(tok::l_paren))
- return false;
- RawLex.LexFromRawLexer(Tok);
- SmallString<128> FilenameBuffer;
- StringRef Filename;
- // Since the raw lexer doesn't give us angle_literals we have to parse them
- // ourselves.
- // FIXME: What to do if the file name is a macro?
- if (Tok.is(tok::less)) {
- RawLex.LexFromRawLexer(Tok);
- FilenameBuffer += '<';
- do {
- if (Tok.is(tok::eod)) // Sanity check.
- return false;
- if (Tok.is(tok::raw_identifier))
- PP.LookUpIdentifierInfo(Tok);
- // Get the string piece.
- SmallVector<char, 128> TmpBuffer;
- bool Invalid = false;
- StringRef TmpName = PP.getSpelling(Tok, TmpBuffer, &Invalid);
- if (Invalid)
- return false;
- FilenameBuffer += TmpName;
- RawLex.LexFromRawLexer(Tok);
- } while (Tok.isNot(tok::greater));
- FilenameBuffer += '>';
- Filename = FilenameBuffer;
- } else {
- if (Tok.isNot(tok::string_literal))
- return false;
- bool Invalid = false;
- Filename = PP.getSpelling(Tok, FilenameBuffer, &Invalid);
- if (Invalid)
- return false;
- }
- // Lex the closing paren.
- RawLex.LexFromRawLexer(Tok);
- if (Tok.isNot(tok::r_paren))
- return false;
- // Now ask HeaderInfo if it knows about the header.
- // FIXME: Subframeworks aren't handled here. Do we care?
- bool isAngled = PP.GetIncludeFilenameSpelling(Tok.getLocation(), Filename);
- const DirectoryLookup *CurDir;
- const FileEntry *FileEnt = PP.getSourceManager().getFileEntryForID(FileId);
- SmallVector<std::pair<const FileEntry *, const DirectoryEntry *>, 1>
- Includers;
- Includers.push_back(std::make_pair(FileEnt, FileEnt->getDir()));
- const FileEntry *File = PP.getHeaderSearchInfo().LookupFile(
- Filename, SourceLocation(), isAngled, nullptr, CurDir, Includers, nullptr,
- nullptr, nullptr, false);
- FileExists = File != nullptr;
- return true;
- }
- /// Use a raw lexer to analyze \p FileId, incrementally copying parts of it
- /// and including content of included files recursively.
- bool InclusionRewriter::Process(FileID FileId,
- SrcMgr::CharacteristicKind FileType)
- {
- bool Invalid;
- const MemoryBuffer &FromFile = *SM.getBuffer(FileId, &Invalid);
- if (Invalid) // invalid inclusion
- return false;
- const char *FileName = FromFile.getBufferIdentifier();
- Lexer RawLex(FileId, &FromFile, PP.getSourceManager(), PP.getLangOpts());
- RawLex.SetCommentRetentionState(false);
- StringRef EOL = DetectEOL(FromFile);
- // Per the GNU docs: "1" indicates entering a new file.
- if (FileId == SM.getMainFileID() || FileId == PP.getPredefinesFileID())
- WriteLineInfo(FileName, 1, FileType, EOL, "");
- else
- WriteLineInfo(FileName, 1, FileType, EOL, " 1");
- if (SM.getFileIDSize(FileId) == 0)
- return false;
- // The next byte to be copied from the source file, which may be non-zero if
- // the lexer handled a BOM.
- unsigned NextToWrite = SM.getFileOffset(RawLex.getSourceLocation());
- assert(SM.getLineNumber(FileId, NextToWrite) == 1);
- int Line = 1; // The current input file line number.
- Token RawToken;
- RawLex.LexFromRawLexer(RawToken);
- // TODO: Consider adding a switch that strips possibly unimportant content,
- // such as comments, to reduce the size of repro files.
- while (RawToken.isNot(tok::eof)) {
- if (RawToken.is(tok::hash) && RawToken.isAtStartOfLine()) {
- RawLex.setParsingPreprocessorDirective(true);
- Token HashToken = RawToken;
- RawLex.LexFromRawLexer(RawToken);
- if (RawToken.is(tok::raw_identifier))
- PP.LookUpIdentifierInfo(RawToken);
- if (RawToken.getIdentifierInfo() != nullptr) {
- switch (RawToken.getIdentifierInfo()->getPPKeywordID()) {
- case tok::pp_include:
- case tok::pp_include_next:
- case tok::pp_import: {
- CommentOutDirective(RawLex, HashToken, FromFile, EOL, NextToWrite,
- Line);
- if (FileId != PP.getPredefinesFileID())
- WriteLineInfo(FileName, Line - 1, FileType, EOL, "");
- StringRef LineInfoExtra;
- if (const FileChange *Change = FindFileChangeLocation(
- HashToken.getLocation())) {
- if (Change->Mod) {
- WriteImplicitModuleImport(Change->Mod, EOL);
- // else now include and recursively process the file
- } else if (Process(Change->Id, Change->FileType)) {
- // and set lineinfo back to this file, if the nested one was
- // actually included
- // `2' indicates returning to a file (after having included
- // another file.
- LineInfoExtra = " 2";
- }
- }
- // fix up lineinfo (since commented out directive changed line
- // numbers) for inclusions that were skipped due to header guards
- WriteLineInfo(FileName, Line, FileType, EOL, LineInfoExtra);
- break;
- }
- case tok::pp_pragma: {
- StringRef Identifier = NextIdentifierName(RawLex, RawToken);
- if (Identifier == "clang" || Identifier == "GCC") {
- if (NextIdentifierName(RawLex, RawToken) == "system_header") {
- // keep the directive in, commented out
- CommentOutDirective(RawLex, HashToken, FromFile, EOL,
- NextToWrite, Line);
- // update our own type
- FileType = SM.getFileCharacteristic(RawToken.getLocation());
- WriteLineInfo(FileName, Line, FileType, EOL);
- }
- } else if (Identifier == "once") {
- // keep the directive in, commented out
- CommentOutDirective(RawLex, HashToken, FromFile, EOL,
- NextToWrite, Line);
- WriteLineInfo(FileName, Line, FileType, EOL);
- }
- break;
- }
- case tok::pp_if:
- case tok::pp_elif: {
- bool elif = (RawToken.getIdentifierInfo()->getPPKeywordID() ==
- tok::pp_elif);
- // Rewrite special builtin macros to avoid pulling in host details.
- do {
- // Walk over the directive.
- RawLex.LexFromRawLexer(RawToken);
- if (RawToken.is(tok::raw_identifier))
- PP.LookUpIdentifierInfo(RawToken);
- if (RawToken.is(tok::identifier)) {
- bool HasFile;
- SourceLocation Loc = RawToken.getLocation();
- // Rewrite __has_include(x)
- if (RawToken.getIdentifierInfo()->isStr("__has_include")) {
- if (!HandleHasInclude(FileId, RawLex, nullptr, RawToken,
- HasFile))
- continue;
- // Rewrite __has_include_next(x)
- } else if (RawToken.getIdentifierInfo()->isStr(
- "__has_include_next")) {
- const DirectoryLookup *Lookup = PP.GetCurDirLookup();
- if (Lookup)
- ++Lookup;
- if (!HandleHasInclude(FileId, RawLex, Lookup, RawToken,
- HasFile))
- continue;
- } else {
- continue;
- }
- // Replace the macro with (0) or (1), followed by the commented
- // out macro for reference.
- OutputContentUpTo(FromFile, NextToWrite, SM.getFileOffset(Loc),
- EOL, Line, false);
- OS << '(' << (int) HasFile << ")/*";
- OutputContentUpTo(FromFile, NextToWrite,
- SM.getFileOffset(RawToken.getLocation()) +
- RawToken.getLength(),
- EOL, Line, false);
- OS << "*/";
- }
- } while (RawToken.isNot(tok::eod));
- if (elif) {
- OutputContentUpTo(FromFile, NextToWrite,
- SM.getFileOffset(RawToken.getLocation()) +
- RawToken.getLength(),
- EOL, Line, /*EnsureNewLine*/ true);
- WriteLineInfo(FileName, Line, FileType, EOL);
- }
- break;
- }
- case tok::pp_endif:
- case tok::pp_else: {
- // We surround every #include by #if 0 to comment it out, but that
- // changes line numbers. These are fixed up right after that, but
- // the whole #include could be inside a preprocessor conditional
- // that is not processed. So it is necessary to fix the line
- // numbers one the next line after each #else/#endif as well.
- RawLex.SetKeepWhitespaceMode(true);
- do {
- RawLex.LexFromRawLexer(RawToken);
- } while (RawToken.isNot(tok::eod) && RawToken.isNot(tok::eof));
- OutputContentUpTo(
- FromFile, NextToWrite,
- SM.getFileOffset(RawToken.getLocation()) + RawToken.getLength(),
- EOL, Line, /*EnsureNewLine*/ true);
- WriteLineInfo(FileName, Line, FileType, EOL);
- RawLex.SetKeepWhitespaceMode(false);
- }
- default:
- break;
- }
- }
- RawLex.setParsingPreprocessorDirective(false);
- }
- RawLex.LexFromRawLexer(RawToken);
- }
- OutputContentUpTo(FromFile, NextToWrite,
- SM.getFileOffset(SM.getLocForEndOfFile(FileId)), EOL, Line,
- /*EnsureNewline*/true);
- return true;
- }
- /// InclusionRewriterInInput - Implement -frewrite-includes mode.
- void clang::RewriteIncludesInInput(Preprocessor &PP, raw_ostream *OS,
- const PreprocessorOutputOptions &Opts) {
- SourceManager &SM = PP.getSourceManager();
- InclusionRewriter *Rewrite = new InclusionRewriter(PP, *OS,
- Opts.ShowLineMarkers);
- PP.addPPCallbacks(Rewrite);
- PP.IgnorePragmas();
- // First let the preprocessor process the entire file and call callbacks.
- // Callbacks will record which #include's were actually performed.
- PP.EnterMainSourceFile();
- Token Tok;
- // Only preprocessor directives matter here, so disable macro expansion
- // everywhere else as an optimization.
- // TODO: It would be even faster if the preprocessor could be switched
- // to a mode where it would parse only preprocessor directives and comments,
- // nothing else matters for parsing or processing.
- PP.SetMacroExpansionOnlyInDirectives();
- do {
- PP.Lex(Tok);
- } while (Tok.isNot(tok::eof));
- Rewrite->setPredefinesBuffer(SM.getBuffer(PP.getPredefinesFileID()));
- Rewrite->Process(PP.getPredefinesFileID(), SrcMgr::C_User);
- Rewrite->Process(SM.getMainFileID(), SrcMgr::C_User);
- OS->flush();
- }
|