PrintPreprocessedOutput.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520
  1. //===--- PrintPreprocessedOutput.cpp - Implement the -E mode --------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This code simply runs the preprocessor on the input file and prints out the
  11. // result. This is the traditional behavior of the -E option.
  12. //
  13. //===----------------------------------------------------------------------===//
  14. #include "clang/Frontend/Utils.h"
  15. #include "clang/Basic/Diagnostic.h"
  16. #include "clang/Basic/SourceManager.h"
  17. #include "clang/Frontend/PreprocessorOutputOptions.h"
  18. #include "clang/Lex/MacroInfo.h"
  19. #include "clang/Lex/PPCallbacks.h"
  20. #include "clang/Lex/Pragma.h"
  21. #include "clang/Lex/Preprocessor.h"
  22. #include "clang/Lex/TokenConcatenation.h"
  23. #include "llvm/ADT/SmallString.h"
  24. #include "llvm/ADT/STLExtras.h"
  25. #include "llvm/Config/config.h"
  26. #include "llvm/Support/raw_ostream.h"
  27. #include <cstdio>
  28. using namespace clang;
  29. /// PrintMacroDefinition - Print a macro definition in a form that will be
  30. /// properly accepted back as a definition.
  31. static void PrintMacroDefinition(const IdentifierInfo &II, const MacroInfo &MI,
  32. Preprocessor &PP, llvm::raw_ostream &OS) {
  33. OS << "#define " << II.getName();
  34. if (MI.isFunctionLike()) {
  35. OS << '(';
  36. if (!MI.arg_empty()) {
  37. MacroInfo::arg_iterator AI = MI.arg_begin(), E = MI.arg_end();
  38. for (; AI+1 != E; ++AI) {
  39. OS << (*AI)->getName();
  40. OS << ',';
  41. }
  42. // Last argument.
  43. if ((*AI)->getName() == "__VA_ARGS__")
  44. OS << "...";
  45. else
  46. OS << (*AI)->getName();
  47. }
  48. if (MI.isGNUVarargs())
  49. OS << "..."; // #define foo(x...)
  50. OS << ')';
  51. }
  52. // GCC always emits a space, even if the macro body is empty. However, do not
  53. // want to emit two spaces if the first token has a leading space.
  54. if (MI.tokens_empty() || !MI.tokens_begin()->hasLeadingSpace())
  55. OS << ' ';
  56. llvm::SmallString<128> SpellingBuffer;
  57. for (MacroInfo::tokens_iterator I = MI.tokens_begin(), E = MI.tokens_end();
  58. I != E; ++I) {
  59. if (I->hasLeadingSpace())
  60. OS << ' ';
  61. OS << PP.getSpelling(*I, SpellingBuffer);
  62. }
  63. }
  64. //===----------------------------------------------------------------------===//
  65. // Preprocessed token printer
  66. //===----------------------------------------------------------------------===//
  67. namespace {
  68. class PrintPPOutputPPCallbacks : public PPCallbacks {
  69. Preprocessor &PP;
  70. SourceManager &SM;
  71. TokenConcatenation ConcatInfo;
  72. public:
  73. llvm::raw_ostream &OS;
  74. private:
  75. unsigned CurLine;
  76. bool EmittedTokensOnThisLine;
  77. bool EmittedMacroOnThisLine;
  78. SrcMgr::CharacteristicKind FileType;
  79. llvm::SmallString<512> CurFilename;
  80. bool Initialized;
  81. bool DisableLineMarkers;
  82. bool DumpDefines;
  83. bool UseLineDirective;
  84. public:
  85. PrintPPOutputPPCallbacks(Preprocessor &pp, llvm::raw_ostream &os,
  86. bool lineMarkers, bool defines)
  87. : PP(pp), SM(PP.getSourceManager()),
  88. ConcatInfo(PP), OS(os), DisableLineMarkers(lineMarkers),
  89. DumpDefines(defines) {
  90. CurLine = 0;
  91. CurFilename += "<uninit>";
  92. EmittedTokensOnThisLine = false;
  93. EmittedMacroOnThisLine = false;
  94. FileType = SrcMgr::C_User;
  95. Initialized = false;
  96. // If we're in microsoft mode, use normal #line instead of line markers.
  97. UseLineDirective = PP.getLangOptions().Microsoft;
  98. }
  99. void SetEmittedTokensOnThisLine() { EmittedTokensOnThisLine = true; }
  100. bool hasEmittedTokensOnThisLine() const { return EmittedTokensOnThisLine; }
  101. virtual void FileChanged(SourceLocation Loc, FileChangeReason Reason,
  102. SrcMgr::CharacteristicKind FileType);
  103. virtual void Ident(SourceLocation Loc, const std::string &str);
  104. virtual void PragmaComment(SourceLocation Loc, const IdentifierInfo *Kind,
  105. const std::string &Str);
  106. bool HandleFirstTokOnLine(Token &Tok);
  107. bool MoveToLine(SourceLocation Loc) {
  108. return MoveToLine(SM.getPresumedLoc(Loc).getLine());
  109. }
  110. bool MoveToLine(unsigned LineNo);
  111. bool AvoidConcat(const Token &PrevPrevTok, const Token &PrevTok,
  112. const Token &Tok) {
  113. return ConcatInfo.AvoidConcat(PrevPrevTok, PrevTok, Tok);
  114. }
  115. void WriteLineInfo(unsigned LineNo, const char *Extra=0, unsigned ExtraLen=0);
  116. void HandleNewlinesInToken(const char *TokStr, unsigned Len);
  117. /// MacroDefined - This hook is called whenever a macro definition is seen.
  118. void MacroDefined(const IdentifierInfo *II, const MacroInfo *MI);
  119. };
  120. } // end anonymous namespace
  121. void PrintPPOutputPPCallbacks::WriteLineInfo(unsigned LineNo,
  122. const char *Extra,
  123. unsigned ExtraLen) {
  124. if (EmittedTokensOnThisLine || EmittedMacroOnThisLine) {
  125. OS << '\n';
  126. EmittedTokensOnThisLine = false;
  127. EmittedMacroOnThisLine = false;
  128. }
  129. // Emit #line directives or GNU line markers depending on what mode we're in.
  130. if (UseLineDirective) {
  131. OS << "#line" << ' ' << LineNo << ' ' << '"';
  132. OS.write(&CurFilename[0], CurFilename.size());
  133. OS << '"';
  134. } else {
  135. OS << '#' << ' ' << LineNo << ' ' << '"';
  136. OS.write(&CurFilename[0], CurFilename.size());
  137. OS << '"';
  138. if (ExtraLen)
  139. OS.write(Extra, ExtraLen);
  140. if (FileType == SrcMgr::C_System)
  141. OS.write(" 3", 2);
  142. else if (FileType == SrcMgr::C_ExternCSystem)
  143. OS.write(" 3 4", 4);
  144. }
  145. OS << '\n';
  146. }
  147. /// MoveToLine - Move the output to the source line specified by the location
  148. /// object. We can do this by emitting some number of \n's, or be emitting a
  149. /// #line directive. This returns false if already at the specified line, true
  150. /// if some newlines were emitted.
  151. bool PrintPPOutputPPCallbacks::MoveToLine(unsigned LineNo) {
  152. // If this line is "close enough" to the original line, just print newlines,
  153. // otherwise print a #line directive.
  154. if (LineNo-CurLine <= 8) {
  155. if (LineNo-CurLine == 1)
  156. OS << '\n';
  157. else if (LineNo == CurLine)
  158. return false; // Spelling line moved, but instantiation line didn't.
  159. else {
  160. const char *NewLines = "\n\n\n\n\n\n\n\n";
  161. OS.write(NewLines, LineNo-CurLine);
  162. }
  163. } else if (!DisableLineMarkers) {
  164. // Emit a #line or line marker.
  165. WriteLineInfo(LineNo, 0, 0);
  166. } else {
  167. // Okay, we're in -P mode, which turns off line markers. However, we still
  168. // need to emit a newline between tokens on different lines.
  169. if (EmittedTokensOnThisLine || EmittedMacroOnThisLine) {
  170. OS << '\n';
  171. EmittedTokensOnThisLine = false;
  172. EmittedMacroOnThisLine = false;
  173. }
  174. }
  175. CurLine = LineNo;
  176. return true;
  177. }
  178. /// FileChanged - Whenever the preprocessor enters or exits a #include file
  179. /// it invokes this handler. Update our conception of the current source
  180. /// position.
  181. void PrintPPOutputPPCallbacks::FileChanged(SourceLocation Loc,
  182. FileChangeReason Reason,
  183. SrcMgr::CharacteristicKind NewFileType) {
  184. // Unless we are exiting a #include, make sure to skip ahead to the line the
  185. // #include directive was at.
  186. SourceManager &SourceMgr = SM;
  187. PresumedLoc UserLoc = SourceMgr.getPresumedLoc(Loc);
  188. unsigned NewLine = UserLoc.getLine();
  189. if (Reason == PPCallbacks::EnterFile) {
  190. SourceLocation IncludeLoc = SourceMgr.getPresumedLoc(Loc).getIncludeLoc();
  191. if (IncludeLoc.isValid())
  192. MoveToLine(IncludeLoc);
  193. } else if (Reason == PPCallbacks::SystemHeaderPragma) {
  194. MoveToLine(NewLine);
  195. // TODO GCC emits the # directive for this directive on the line AFTER the
  196. // directive and emits a bunch of spaces that aren't needed. Emulate this
  197. // strange behavior.
  198. }
  199. CurLine = NewLine;
  200. if (DisableLineMarkers) return;
  201. CurFilename.clear();
  202. CurFilename += UserLoc.getFilename();
  203. Lexer::Stringify(CurFilename);
  204. FileType = NewFileType;
  205. if (!Initialized) {
  206. WriteLineInfo(CurLine);
  207. Initialized = true;
  208. }
  209. switch (Reason) {
  210. case PPCallbacks::EnterFile:
  211. WriteLineInfo(CurLine, " 1", 2);
  212. break;
  213. case PPCallbacks::ExitFile:
  214. WriteLineInfo(CurLine, " 2", 2);
  215. break;
  216. case PPCallbacks::SystemHeaderPragma:
  217. case PPCallbacks::RenameFile:
  218. WriteLineInfo(CurLine);
  219. break;
  220. }
  221. }
  222. /// Ident - Handle #ident directives when read by the preprocessor.
  223. ///
  224. void PrintPPOutputPPCallbacks::Ident(SourceLocation Loc, const std::string &S) {
  225. MoveToLine(Loc);
  226. OS.write("#ident ", strlen("#ident "));
  227. OS.write(&S[0], S.size());
  228. EmittedTokensOnThisLine = true;
  229. }
  230. /// MacroDefined - This hook is called whenever a macro definition is seen.
  231. void PrintPPOutputPPCallbacks::MacroDefined(const IdentifierInfo *II,
  232. const MacroInfo *MI) {
  233. // Only print out macro definitions in -dD mode.
  234. if (!DumpDefines ||
  235. // Ignore __FILE__ etc.
  236. MI->isBuiltinMacro()) return;
  237. MoveToLine(MI->getDefinitionLoc());
  238. PrintMacroDefinition(*II, *MI, PP, OS);
  239. EmittedMacroOnThisLine = true;
  240. }
  241. void PrintPPOutputPPCallbacks::PragmaComment(SourceLocation Loc,
  242. const IdentifierInfo *Kind,
  243. const std::string &Str) {
  244. MoveToLine(Loc);
  245. OS << "#pragma comment(" << Kind->getName();
  246. if (!Str.empty()) {
  247. OS << ", \"";
  248. for (unsigned i = 0, e = Str.size(); i != e; ++i) {
  249. unsigned char Char = Str[i];
  250. if (isprint(Char) && Char != '\\' && Char != '"')
  251. OS << (char)Char;
  252. else // Output anything hard as an octal escape.
  253. OS << '\\'
  254. << (char)('0'+ ((Char >> 6) & 7))
  255. << (char)('0'+ ((Char >> 3) & 7))
  256. << (char)('0'+ ((Char >> 0) & 7));
  257. }
  258. OS << '"';
  259. }
  260. OS << ')';
  261. EmittedTokensOnThisLine = true;
  262. }
  263. /// HandleFirstTokOnLine - When emitting a preprocessed file in -E mode, this
  264. /// is called for the first token on each new line. If this really is the start
  265. /// of a new logical line, handle it and return true, otherwise return false.
  266. /// This may not be the start of a logical line because the "start of line"
  267. /// marker is set for spelling lines, not instantiation ones.
  268. bool PrintPPOutputPPCallbacks::HandleFirstTokOnLine(Token &Tok) {
  269. // Figure out what line we went to and insert the appropriate number of
  270. // newline characters.
  271. if (!MoveToLine(Tok.getLocation()))
  272. return false;
  273. // Print out space characters so that the first token on a line is
  274. // indented for easy reading.
  275. unsigned ColNo = SM.getInstantiationColumnNumber(Tok.getLocation());
  276. // This hack prevents stuff like:
  277. // #define HASH #
  278. // HASH define foo bar
  279. // From having the # character end up at column 1, which makes it so it
  280. // is not handled as a #define next time through the preprocessor if in
  281. // -fpreprocessed mode.
  282. if (ColNo <= 1 && Tok.is(tok::hash))
  283. OS << ' ';
  284. // Otherwise, indent the appropriate number of spaces.
  285. for (; ColNo > 1; --ColNo)
  286. OS << ' ';
  287. return true;
  288. }
  289. void PrintPPOutputPPCallbacks::HandleNewlinesInToken(const char *TokStr,
  290. unsigned Len) {
  291. unsigned NumNewlines = 0;
  292. for (; Len; --Len, ++TokStr) {
  293. if (*TokStr != '\n' &&
  294. *TokStr != '\r')
  295. continue;
  296. ++NumNewlines;
  297. // If we have \n\r or \r\n, skip both and count as one line.
  298. if (Len != 1 &&
  299. (TokStr[1] == '\n' || TokStr[1] == '\r') &&
  300. TokStr[0] != TokStr[1])
  301. ++TokStr, --Len;
  302. }
  303. if (NumNewlines == 0) return;
  304. CurLine += NumNewlines;
  305. }
  306. namespace {
  307. struct UnknownPragmaHandler : public PragmaHandler {
  308. const char *Prefix;
  309. PrintPPOutputPPCallbacks *Callbacks;
  310. UnknownPragmaHandler(const char *prefix, PrintPPOutputPPCallbacks *callbacks)
  311. : PragmaHandler(0), Prefix(prefix), Callbacks(callbacks) {}
  312. virtual void HandlePragma(Preprocessor &PP, Token &PragmaTok) {
  313. // Figure out what line we went to and insert the appropriate number of
  314. // newline characters.
  315. Callbacks->MoveToLine(PragmaTok.getLocation());
  316. Callbacks->OS.write(Prefix, strlen(Prefix));
  317. // Read and print all of the pragma tokens.
  318. while (PragmaTok.isNot(tok::eom)) {
  319. if (PragmaTok.hasLeadingSpace())
  320. Callbacks->OS << ' ';
  321. std::string TokSpell = PP.getSpelling(PragmaTok);
  322. Callbacks->OS.write(&TokSpell[0], TokSpell.size());
  323. PP.LexUnexpandedToken(PragmaTok);
  324. }
  325. Callbacks->OS << '\n';
  326. }
  327. };
  328. } // end anonymous namespace
  329. static void PrintPreprocessedTokens(Preprocessor &PP, Token &Tok,
  330. PrintPPOutputPPCallbacks *Callbacks,
  331. llvm::raw_ostream &OS) {
  332. char Buffer[256];
  333. Token PrevPrevTok;
  334. Token PrevTok;
  335. PrevPrevTok.setKind(tok::unknown);
  336. PrevTok.setKind(tok::unknown);
  337. while (1) {
  338. // If this token is at the start of a line, emit newlines if needed.
  339. if (Tok.isAtStartOfLine() && Callbacks->HandleFirstTokOnLine(Tok)) {
  340. // done.
  341. } else if (Tok.hasLeadingSpace() ||
  342. // If we haven't emitted a token on this line yet, PrevTok isn't
  343. // useful to look at and no concatenation could happen anyway.
  344. (Callbacks->hasEmittedTokensOnThisLine() &&
  345. // Don't print "-" next to "-", it would form "--".
  346. Callbacks->AvoidConcat(PrevPrevTok, PrevTok, Tok))) {
  347. OS << ' ';
  348. }
  349. if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
  350. OS << II->getName();
  351. } else if (Tok.isLiteral() && !Tok.needsCleaning() &&
  352. Tok.getLiteralData()) {
  353. OS.write(Tok.getLiteralData(), Tok.getLength());
  354. } else if (Tok.getLength() < 256) {
  355. const char *TokPtr = Buffer;
  356. unsigned Len = PP.getSpelling(Tok, TokPtr);
  357. OS.write(TokPtr, Len);
  358. // Tokens that can contain embedded newlines need to adjust our current
  359. // line number.
  360. if (Tok.getKind() == tok::comment)
  361. Callbacks->HandleNewlinesInToken(TokPtr, Len);
  362. } else {
  363. std::string S = PP.getSpelling(Tok);
  364. OS.write(&S[0], S.size());
  365. // Tokens that can contain embedded newlines need to adjust our current
  366. // line number.
  367. if (Tok.getKind() == tok::comment)
  368. Callbacks->HandleNewlinesInToken(&S[0], S.size());
  369. }
  370. Callbacks->SetEmittedTokensOnThisLine();
  371. if (Tok.is(tok::eof)) break;
  372. PrevPrevTok = PrevTok;
  373. PrevTok = Tok;
  374. PP.Lex(Tok);
  375. }
  376. }
  377. typedef std::pair<IdentifierInfo*, MacroInfo*> id_macro_pair;
  378. static int MacroIDCompare(const void* a, const void* b) {
  379. const id_macro_pair *LHS = static_cast<const id_macro_pair*>(a);
  380. const id_macro_pair *RHS = static_cast<const id_macro_pair*>(b);
  381. return LHS->first->getName().compare(RHS->first->getName());
  382. }
  383. static void DoPrintMacros(Preprocessor &PP, llvm::raw_ostream *OS) {
  384. // Ignore unknown pragmas.
  385. PP.AddPragmaHandler(0, new EmptyPragmaHandler());
  386. // -dM mode just scans and ignores all tokens in the files, then dumps out
  387. // the macro table at the end.
  388. PP.EnterMainSourceFile();
  389. Token Tok;
  390. do PP.Lex(Tok);
  391. while (Tok.isNot(tok::eof));
  392. llvm::SmallVector<id_macro_pair, 128>
  393. MacrosByID(PP.macro_begin(), PP.macro_end());
  394. llvm::array_pod_sort(MacrosByID.begin(), MacrosByID.end(), MacroIDCompare);
  395. for (unsigned i = 0, e = MacrosByID.size(); i != e; ++i) {
  396. MacroInfo &MI = *MacrosByID[i].second;
  397. // Ignore computed macros like __LINE__ and friends.
  398. if (MI.isBuiltinMacro()) continue;
  399. PrintMacroDefinition(*MacrosByID[i].first, MI, PP, *OS);
  400. *OS << '\n';
  401. }
  402. }
  403. /// DoPrintPreprocessedInput - This implements -E mode.
  404. ///
  405. void clang::DoPrintPreprocessedInput(Preprocessor &PP, llvm::raw_ostream *OS,
  406. const PreprocessorOutputOptions &Opts) {
  407. // Show macros with no output is handled specially.
  408. if (!Opts.ShowCPP) {
  409. assert(Opts.ShowMacros && "Not yet implemented!");
  410. DoPrintMacros(PP, OS);
  411. return;
  412. }
  413. // Inform the preprocessor whether we want it to retain comments or not, due
  414. // to -C or -CC.
  415. PP.SetCommentRetentionState(Opts.ShowComments, Opts.ShowMacroComments);
  416. PrintPPOutputPPCallbacks *Callbacks =
  417. new PrintPPOutputPPCallbacks(PP, *OS, !Opts.ShowLineMarkers,
  418. Opts.ShowMacros);
  419. PP.AddPragmaHandler(0, new UnknownPragmaHandler("#pragma", Callbacks));
  420. PP.AddPragmaHandler("GCC", new UnknownPragmaHandler("#pragma GCC",
  421. Callbacks));
  422. PP.addPPCallbacks(Callbacks);
  423. // After we have configured the preprocessor, enter the main file.
  424. PP.EnterMainSourceFile();
  425. // Consume all of the tokens that come from the predefines buffer. Those
  426. // should not be emitted into the output and are guaranteed to be at the
  427. // start.
  428. const SourceManager &SourceMgr = PP.getSourceManager();
  429. Token Tok;
  430. do PP.Lex(Tok);
  431. while (Tok.isNot(tok::eof) && Tok.getLocation().isFileID() &&
  432. !strcmp(SourceMgr.getPresumedLoc(Tok.getLocation()).getFilename(),
  433. "<built-in>"));
  434. // Read all the preprocessed tokens, printing them out to the stream.
  435. PrintPreprocessedTokens(PP, Tok, Callbacks, *OS);
  436. *OS << '\n';
  437. }