MacroExpander.cpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636
  1. //===--- MacroExpander.cpp - Lex from a macro expansion -------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file was developed by Chris Lattner and is distributed under
  6. // the University of Illinois Open Source License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. //
  10. // This file implements the MacroExpander interface.
  11. //
  12. //===----------------------------------------------------------------------===//
  13. #include "clang/Lex/MacroExpander.h"
  14. #include "clang/Lex/MacroInfo.h"
  15. #include "clang/Lex/Preprocessor.h"
  16. #include "clang/Basic/SourceManager.h"
  17. #include "clang/Basic/Diagnostic.h"
  18. #include "llvm/ADT/SmallVector.h"
  19. using namespace clang;
  20. //===----------------------------------------------------------------------===//
  21. // MacroArgs Implementation
  22. //===----------------------------------------------------------------------===//
  23. /// MacroArgs ctor function - This destroys the vector passed in.
  24. MacroArgs *MacroArgs::create(const MacroInfo *MI,
  25. const LexerToken *UnexpArgTokens,
  26. unsigned NumToks, bool VarargsElided) {
  27. assert(MI->isFunctionLike() &&
  28. "Can't have args for an object-like macro!");
  29. // Allocate memory for the MacroArgs object with the lexer tokens at the end.
  30. MacroArgs *Result = (MacroArgs*)malloc(sizeof(MacroArgs) +
  31. NumToks*sizeof(LexerToken));
  32. // Construct the macroargs object.
  33. new (Result) MacroArgs(NumToks, VarargsElided);
  34. // Copy the actual unexpanded tokens to immediately after the result ptr.
  35. if (NumToks)
  36. memcpy(const_cast<LexerToken*>(Result->getUnexpArgument(0)),
  37. UnexpArgTokens, NumToks*sizeof(LexerToken));
  38. return Result;
  39. }
  40. /// destroy - Destroy and deallocate the memory for this object.
  41. ///
  42. void MacroArgs::destroy() {
  43. // Run the dtor to deallocate the vectors.
  44. this->~MacroArgs();
  45. // Release the memory for the object.
  46. free(this);
  47. }
  48. /// getArgLength - Given a pointer to an expanded or unexpanded argument,
  49. /// return the number of tokens, not counting the EOF, that make up the
  50. /// argument.
  51. unsigned MacroArgs::getArgLength(const LexerToken *ArgPtr) {
  52. unsigned NumArgTokens = 0;
  53. for (; ArgPtr->getKind() != tok::eof; ++ArgPtr)
  54. ++NumArgTokens;
  55. return NumArgTokens;
  56. }
  57. /// getUnexpArgument - Return the unexpanded tokens for the specified formal.
  58. ///
  59. const LexerToken *MacroArgs::getUnexpArgument(unsigned Arg) const {
  60. // The unexpanded argument tokens start immediately after the MacroArgs object
  61. // in memory.
  62. const LexerToken *Start = (const LexerToken *)(this+1);
  63. const LexerToken *Result = Start;
  64. // Scan to find Arg.
  65. for (; Arg; ++Result) {
  66. assert(Result < Start+NumUnexpArgTokens && "Invalid arg #");
  67. if (Result->getKind() == tok::eof)
  68. --Arg;
  69. }
  70. return Result;
  71. }
  72. /// ArgNeedsPreexpansion - If we can prove that the argument won't be affected
  73. /// by pre-expansion, return false. Otherwise, conservatively return true.
  74. bool MacroArgs::ArgNeedsPreexpansion(const LexerToken *ArgTok) const {
  75. // If there are no identifiers in the argument list, or if the identifiers are
  76. // known to not be macros, pre-expansion won't modify it.
  77. for (; ArgTok->getKind() != tok::eof; ++ArgTok)
  78. if (IdentifierInfo *II = ArgTok->getIdentifierInfo()) {
  79. if (II->getMacroInfo() && II->getMacroInfo()->isEnabled())
  80. // Return true even though the macro could be a function-like macro
  81. // without a following '(' token.
  82. return true;
  83. }
  84. return false;
  85. }
  86. /// getPreExpArgument - Return the pre-expanded form of the specified
  87. /// argument.
  88. const std::vector<LexerToken> &
  89. MacroArgs::getPreExpArgument(unsigned Arg, Preprocessor &PP) {
  90. assert(Arg < NumUnexpArgTokens && "Invalid argument number!");
  91. // If we have already computed this, return it.
  92. if (PreExpArgTokens.empty())
  93. PreExpArgTokens.resize(NumUnexpArgTokens);
  94. std::vector<LexerToken> &Result = PreExpArgTokens[Arg];
  95. if (!Result.empty()) return Result;
  96. const LexerToken *AT = getUnexpArgument(Arg);
  97. unsigned NumToks = getArgLength(AT)+1; // Include the EOF.
  98. // Otherwise, we have to pre-expand this argument, populating Result. To do
  99. // this, we set up a fake MacroExpander to lex from the unexpanded argument
  100. // list. With this installed, we lex expanded tokens until we hit the EOF
  101. // token at the end of the unexp list.
  102. PP.EnterTokenStream(AT, NumToks);
  103. // Lex all of the macro-expanded tokens into Result.
  104. do {
  105. Result.push_back(LexerToken());
  106. PP.Lex(Result.back());
  107. } while (Result.back().getKind() != tok::eof);
  108. // Pop the token stream off the top of the stack. We know that the internal
  109. // pointer inside of it is to the "end" of the token stream, but the stack
  110. // will not otherwise be popped until the next token is lexed. The problem is
  111. // that the token may be lexed sometime after the vector of tokens itself is
  112. // destroyed, which would be badness.
  113. PP.RemoveTopOfLexerStack();
  114. return Result;
  115. }
  116. /// StringifyArgument - Implement C99 6.10.3.2p2, converting a sequence of
  117. /// tokens into the literal string token that should be produced by the C #
  118. /// preprocessor operator.
  119. ///
  120. static LexerToken StringifyArgument(const LexerToken *ArgToks,
  121. Preprocessor &PP, bool Charify = false) {
  122. LexerToken Tok;
  123. Tok.startToken();
  124. Tok.setKind(tok::string_literal);
  125. const LexerToken *ArgTokStart = ArgToks;
  126. // Stringify all the tokens.
  127. std::string Result = "\"";
  128. // FIXME: Optimize this loop to not use std::strings.
  129. bool isFirst = true;
  130. for (; ArgToks->getKind() != tok::eof; ++ArgToks) {
  131. const LexerToken &Tok = *ArgToks;
  132. if (!isFirst && Tok.hasLeadingSpace())
  133. Result += ' ';
  134. isFirst = false;
  135. // If this is a string or character constant, escape the token as specified
  136. // by 6.10.3.2p2.
  137. if (Tok.getKind() == tok::string_literal || // "foo"
  138. Tok.getKind() == tok::wide_string_literal || // L"foo"
  139. Tok.getKind() == tok::char_constant) { // 'x' and L'x'.
  140. Result += Lexer::Stringify(PP.getSpelling(Tok));
  141. } else {
  142. // Otherwise, just append the token.
  143. Result += PP.getSpelling(Tok);
  144. }
  145. }
  146. // If the last character of the string is a \, and if it isn't escaped, this
  147. // is an invalid string literal, diagnose it as specified in C99.
  148. if (Result[Result.size()-1] == '\\') {
  149. // Count the number of consequtive \ characters. If even, then they are
  150. // just escaped backslashes, otherwise it's an error.
  151. unsigned FirstNonSlash = Result.size()-2;
  152. // Guaranteed to find the starting " if nothing else.
  153. while (Result[FirstNonSlash] == '\\')
  154. --FirstNonSlash;
  155. if ((Result.size()-1-FirstNonSlash) & 1) {
  156. // Diagnose errors for things like: #define F(X) #X / F(\)
  157. PP.Diag(ArgToks[-1], diag::pp_invalid_string_literal);
  158. Result.erase(Result.end()-1); // remove one of the \'s.
  159. }
  160. }
  161. Result += '"';
  162. // If this is the charify operation and the result is not a legal character
  163. // constant, diagnose it.
  164. if (Charify) {
  165. // First step, turn double quotes into single quotes:
  166. Result[0] = '\'';
  167. Result[Result.size()-1] = '\'';
  168. // Check for bogus character.
  169. bool isBad = false;
  170. if (Result.size() == 3) {
  171. isBad = Result[1] == '\''; // ''' is not legal. '\' already fixed above.
  172. } else {
  173. isBad = (Result.size() != 4 || Result[1] != '\\'); // Not '\x'
  174. }
  175. if (isBad) {
  176. PP.Diag(ArgTokStart[0], diag::err_invalid_character_to_charify);
  177. Result = "' '"; // Use something arbitrary, but legal.
  178. }
  179. }
  180. Tok.setLength(Result.size());
  181. Tok.setLocation(PP.CreateString(&Result[0], Result.size()));
  182. return Tok;
  183. }
  184. /// getStringifiedArgument - Compute, cache, and return the specified argument
  185. /// that has been 'stringified' as required by the # operator.
  186. const LexerToken &MacroArgs::getStringifiedArgument(unsigned ArgNo,
  187. Preprocessor &PP) {
  188. assert(ArgNo < NumUnexpArgTokens && "Invalid argument number!");
  189. if (StringifiedArgs.empty()) {
  190. StringifiedArgs.resize(getNumArguments());
  191. memset(&StringifiedArgs[0], 0,
  192. sizeof(StringifiedArgs[0])*getNumArguments());
  193. }
  194. if (StringifiedArgs[ArgNo].getKind() != tok::string_literal)
  195. StringifiedArgs[ArgNo] = StringifyArgument(getUnexpArgument(ArgNo), PP);
  196. return StringifiedArgs[ArgNo];
  197. }
  198. //===----------------------------------------------------------------------===//
  199. // MacroExpander Implementation
  200. //===----------------------------------------------------------------------===//
  201. /// Create a macro expander for the specified macro with the specified actual
  202. /// arguments. Note that this ctor takes ownership of the ActualArgs pointer.
  203. MacroExpander::MacroExpander(LexerToken &Tok, MacroArgs *Actuals,
  204. Preprocessor &pp)
  205. : Macro(Tok.getIdentifierInfo()->getMacroInfo()),
  206. ActualArgs(Actuals), PP(pp), CurToken(0),
  207. InstantiateLoc(Tok.getLocation()),
  208. AtStartOfLine(Tok.isAtStartOfLine()),
  209. HasLeadingSpace(Tok.hasLeadingSpace()) {
  210. MacroTokens = &Macro->getReplacementTokens()[0];
  211. NumMacroTokens = Macro->getReplacementTokens().size();
  212. // If this is a function-like macro, expand the arguments and change
  213. // MacroTokens to point to the expanded tokens.
  214. if (Macro->isFunctionLike() && Macro->getNumArgs())
  215. ExpandFunctionArguments();
  216. // Mark the macro as currently disabled, so that it is not recursively
  217. // expanded. The macro must be disabled only after argument pre-expansion of
  218. // function-like macro arguments occurs.
  219. Macro->DisableMacro();
  220. }
  221. /// Create a macro expander for the specified token stream. This does not
  222. /// take ownership of the specified token vector.
  223. MacroExpander::MacroExpander(const LexerToken *TokArray, unsigned NumToks,
  224. Preprocessor &pp)
  225. : Macro(0), ActualArgs(0), PP(pp), MacroTokens(TokArray),
  226. NumMacroTokens(NumToks), CurToken(0),
  227. InstantiateLoc(SourceLocation()), AtStartOfLine(false),
  228. HasLeadingSpace(false) {
  229. // Set HasLeadingSpace/AtStartOfLine so that the first token will be
  230. // returned unmodified.
  231. if (NumToks != 0) {
  232. AtStartOfLine = TokArray[0].isAtStartOfLine();
  233. HasLeadingSpace = TokArray[0].hasLeadingSpace();
  234. }
  235. }
  236. MacroExpander::~MacroExpander() {
  237. // If this was a function-like macro that actually uses its arguments, delete
  238. // the expanded tokens.
  239. if (Macro && MacroTokens != &Macro->getReplacementTokens()[0])
  240. delete [] MacroTokens;
  241. // MacroExpander owns its formal arguments.
  242. if (ActualArgs) ActualArgs->destroy();
  243. }
  244. /// Expand the arguments of a function-like macro so that we can quickly
  245. /// return preexpanded tokens from MacroTokens.
  246. void MacroExpander::ExpandFunctionArguments() {
  247. llvm::SmallVector<LexerToken, 128> ResultToks;
  248. // Loop through the MacroTokens tokens, expanding them into ResultToks. Keep
  249. // track of whether we change anything. If not, no need to keep them. If so,
  250. // we install the newly expanded sequence as MacroTokens.
  251. bool MadeChange = false;
  252. // NextTokGetsSpace - When this is true, the next token appended to the
  253. // output list will get a leading space, regardless of whether it had one to
  254. // begin with or not. This is used for placemarker support.
  255. bool NextTokGetsSpace = false;
  256. for (unsigned i = 0, e = NumMacroTokens; i != e; ++i) {
  257. // If we found the stringify operator, get the argument stringified. The
  258. // preprocessor already verified that the following token is a macro name
  259. // when the #define was parsed.
  260. const LexerToken &CurTok = MacroTokens[i];
  261. if (CurTok.getKind() == tok::hash || CurTok.getKind() == tok::hashat) {
  262. int ArgNo = Macro->getArgumentNum(MacroTokens[i+1].getIdentifierInfo());
  263. assert(ArgNo != -1 && "Token following # is not an argument?");
  264. LexerToken Res;
  265. if (CurTok.getKind() == tok::hash) // Stringify
  266. Res = ActualArgs->getStringifiedArgument(ArgNo, PP);
  267. else {
  268. // 'charify': don't bother caching these.
  269. Res = StringifyArgument(ActualArgs->getUnexpArgument(ArgNo), PP, true);
  270. }
  271. // The stringified/charified string leading space flag gets set to match
  272. // the #/#@ operator.
  273. if (CurTok.hasLeadingSpace() || NextTokGetsSpace)
  274. Res.setFlag(LexerToken::LeadingSpace);
  275. ResultToks.push_back(Res);
  276. MadeChange = true;
  277. ++i; // Skip arg name.
  278. NextTokGetsSpace = false;
  279. continue;
  280. }
  281. // Otherwise, if this is not an argument token, just add the token to the
  282. // output buffer.
  283. IdentifierInfo *II = CurTok.getIdentifierInfo();
  284. int ArgNo = II ? Macro->getArgumentNum(II) : -1;
  285. if (ArgNo == -1) {
  286. // This isn't an argument, just add it.
  287. ResultToks.push_back(CurTok);
  288. if (NextTokGetsSpace) {
  289. ResultToks.back().setFlag(LexerToken::LeadingSpace);
  290. NextTokGetsSpace = false;
  291. }
  292. continue;
  293. }
  294. // An argument is expanded somehow, the result is different than the
  295. // input.
  296. MadeChange = true;
  297. // Otherwise, this is a use of the argument. Find out if there is a paste
  298. // (##) operator before or after the argument.
  299. bool PasteBefore =
  300. !ResultToks.empty() && ResultToks.back().getKind() == tok::hashhash;
  301. bool PasteAfter = i+1 != e && MacroTokens[i+1].getKind() == tok::hashhash;
  302. // If it is not the LHS/RHS of a ## operator, we must pre-expand the
  303. // argument and substitute the expanded tokens into the result. This is
  304. // C99 6.10.3.1p1.
  305. if (!PasteBefore && !PasteAfter) {
  306. const LexerToken *ResultArgToks;
  307. // Only preexpand the argument if it could possibly need it. This
  308. // avoids some work in common cases.
  309. const LexerToken *ArgTok = ActualArgs->getUnexpArgument(ArgNo);
  310. if (ActualArgs->ArgNeedsPreexpansion(ArgTok))
  311. ResultArgToks = &ActualArgs->getPreExpArgument(ArgNo, PP)[0];
  312. else
  313. ResultArgToks = ArgTok; // Use non-preexpanded tokens.
  314. // If the arg token expanded into anything, append it.
  315. if (ResultArgToks->getKind() != tok::eof) {
  316. unsigned FirstResult = ResultToks.size();
  317. unsigned NumToks = MacroArgs::getArgLength(ResultArgToks);
  318. ResultToks.append(ResultArgToks, ResultArgToks+NumToks);
  319. // If any tokens were substituted from the argument, the whitespace
  320. // before the first token should match the whitespace of the arg
  321. // identifier.
  322. ResultToks[FirstResult].setFlagValue(LexerToken::LeadingSpace,
  323. CurTok.hasLeadingSpace() ||
  324. NextTokGetsSpace);
  325. NextTokGetsSpace = false;
  326. } else {
  327. // If this is an empty argument, and if there was whitespace before the
  328. // formal token, make sure the next token gets whitespace before it.
  329. NextTokGetsSpace = CurTok.hasLeadingSpace();
  330. }
  331. continue;
  332. }
  333. // Okay, we have a token that is either the LHS or RHS of a paste (##)
  334. // argument. It gets substituted as its non-pre-expanded tokens.
  335. const LexerToken *ArgToks = ActualArgs->getUnexpArgument(ArgNo);
  336. unsigned NumToks = MacroArgs::getArgLength(ArgToks);
  337. if (NumToks) { // Not an empty argument?
  338. ResultToks.append(ArgToks, ArgToks+NumToks);
  339. // If the next token was supposed to get leading whitespace, ensure it has
  340. // it now.
  341. if (NextTokGetsSpace) {
  342. ResultToks[ResultToks.size()-NumToks].setFlag(LexerToken::LeadingSpace);
  343. NextTokGetsSpace = false;
  344. }
  345. continue;
  346. }
  347. // If an empty argument is on the LHS or RHS of a paste, the standard (C99
  348. // 6.10.3.3p2,3) calls for a bunch of placemarker stuff to occur. We
  349. // implement this by eating ## operators when a LHS or RHS expands to
  350. // empty.
  351. NextTokGetsSpace |= CurTok.hasLeadingSpace();
  352. if (PasteAfter) {
  353. // Discard the argument token and skip (don't copy to the expansion
  354. // buffer) the paste operator after it.
  355. NextTokGetsSpace |= MacroTokens[i+1].hasLeadingSpace();
  356. ++i;
  357. continue;
  358. }
  359. // If this is on the RHS of a paste operator, we've already copied the
  360. // paste operator to the ResultToks list. Remove it.
  361. assert(PasteBefore && ResultToks.back().getKind() == tok::hashhash);
  362. NextTokGetsSpace |= ResultToks.back().hasLeadingSpace();
  363. ResultToks.pop_back();
  364. // If this is the __VA_ARGS__ token, and if the argument wasn't provided,
  365. // and if the macro had at least one real argument, and if the token before
  366. // the ## was a comma, remove the comma.
  367. if ((unsigned)ArgNo == Macro->getNumArgs()-1 && // is __VA_ARGS__
  368. ActualArgs->isVarargsElidedUse() && // Argument elided.
  369. !ResultToks.empty() && ResultToks.back().getKind() == tok::comma) {
  370. // Never add a space, even if the comma, ##, or arg had a space.
  371. NextTokGetsSpace = false;
  372. ResultToks.pop_back();
  373. }
  374. continue;
  375. }
  376. // If anything changed, install this as the new MacroTokens list.
  377. if (MadeChange) {
  378. // This is deleted in the dtor.
  379. NumMacroTokens = ResultToks.size();
  380. LexerToken *Res = new LexerToken[ResultToks.size()];
  381. if (NumMacroTokens)
  382. memcpy(Res, &ResultToks[0], NumMacroTokens*sizeof(LexerToken));
  383. MacroTokens = Res;
  384. }
  385. }
  386. /// Lex - Lex and return a token from this macro stream.
  387. ///
  388. void MacroExpander::Lex(LexerToken &Tok) {
  389. // Lexing off the end of the macro, pop this macro off the expansion stack.
  390. if (isAtEnd()) {
  391. // If this is a macro (not a token stream), mark the macro enabled now
  392. // that it is no longer being expanded.
  393. if (Macro) Macro->EnableMacro();
  394. // Pop this context off the preprocessors lexer stack and get the next
  395. // token. This will delete "this" so remember the PP instance var.
  396. Preprocessor &PPCache = PP;
  397. if (PP.HandleEndOfMacro(Tok))
  398. return;
  399. // HandleEndOfMacro may not return a token. If it doesn't, lex whatever is
  400. // next.
  401. return PPCache.Lex(Tok);
  402. }
  403. // If this is the first token of the expanded result, we inherit spacing
  404. // properties later.
  405. bool isFirstToken = CurToken == 0;
  406. // Get the next token to return.
  407. Tok = MacroTokens[CurToken++];
  408. // If this token is followed by a token paste (##) operator, paste the tokens!
  409. if (!isAtEnd() && MacroTokens[CurToken].getKind() == tok::hashhash)
  410. PasteTokens(Tok);
  411. // The token's current location indicate where the token was lexed from. We
  412. // need this information to compute the spelling of the token, but any
  413. // diagnostics for the expanded token should appear as if they came from
  414. // InstantiationLoc. Pull this information together into a new SourceLocation
  415. // that captures all of this.
  416. if (InstantiateLoc.isValid()) { // Don't do this for token streams.
  417. SourceManager &SrcMgr = PP.getSourceManager();
  418. // The token could have come from a prior macro expansion. In that case,
  419. // ignore the macro expand part to get to the physloc. This happens for
  420. // stuff like: #define A(X) X A(A(X)) A(1)
  421. SourceLocation PhysLoc = SrcMgr.getPhysicalLoc(Tok.getLocation());
  422. Tok.setLocation(SrcMgr.getInstantiationLoc(PhysLoc, InstantiateLoc));
  423. }
  424. // If this is the first token, set the lexical properties of the token to
  425. // match the lexical properties of the macro identifier.
  426. if (isFirstToken) {
  427. Tok.setFlagValue(LexerToken::StartOfLine , AtStartOfLine);
  428. Tok.setFlagValue(LexerToken::LeadingSpace, HasLeadingSpace);
  429. }
  430. // Handle recursive expansion!
  431. if (Tok.getIdentifierInfo())
  432. return PP.HandleIdentifier(Tok);
  433. // Otherwise, return a normal token.
  434. }
  435. /// PasteTokens - Tok is the LHS of a ## operator, and CurToken is the ##
  436. /// operator. Read the ## and RHS, and paste the LHS/RHS together. If there
  437. /// are is another ## after it, chomp it iteratively. Return the result as Tok.
  438. void MacroExpander::PasteTokens(LexerToken &Tok) {
  439. llvm::SmallVector<char, 128> Buffer;
  440. do {
  441. // Consume the ## operator.
  442. SourceLocation PasteOpLoc = MacroTokens[CurToken].getLocation();
  443. ++CurToken;
  444. assert(!isAtEnd() && "No token on the RHS of a paste operator!");
  445. // Get the RHS token.
  446. const LexerToken &RHS = MacroTokens[CurToken];
  447. bool isInvalid = false;
  448. // Allocate space for the result token. This is guaranteed to be enough for
  449. // the two tokens and a null terminator.
  450. Buffer.resize(Tok.getLength() + RHS.getLength() + 1);
  451. // Get the spelling of the LHS token in Buffer.
  452. const char *BufPtr = &Buffer[0];
  453. unsigned LHSLen = PP.getSpelling(Tok, BufPtr);
  454. if (BufPtr != &Buffer[0]) // Really, we want the chars in Buffer!
  455. memcpy(&Buffer[0], BufPtr, LHSLen);
  456. BufPtr = &Buffer[LHSLen];
  457. unsigned RHSLen = PP.getSpelling(RHS, BufPtr);
  458. if (BufPtr != &Buffer[LHSLen]) // Really, we want the chars in Buffer!
  459. memcpy(&Buffer[LHSLen], BufPtr, RHSLen);
  460. // Add null terminator.
  461. Buffer[LHSLen+RHSLen] = '\0';
  462. // Trim excess space.
  463. Buffer.resize(LHSLen+RHSLen+1);
  464. // Plop the pasted result (including the trailing newline and null) into a
  465. // scratch buffer where we can lex it.
  466. SourceLocation ResultTokLoc = PP.CreateString(&Buffer[0], Buffer.size());
  467. // Lex the resultant pasted token into Result.
  468. LexerToken Result;
  469. // Avoid testing /*, as the lexer would think it is the start of a comment
  470. // and emit an error that it is unterminated.
  471. if (Tok.getKind() == tok::slash && RHS.getKind() == tok::star) {
  472. isInvalid = true;
  473. } else if (Tok.getKind() == tok::identifier &&
  474. RHS.getKind() == tok::identifier) {
  475. // Common paste case: identifier+identifier = identifier. Avoid creating
  476. // a lexer and other overhead.
  477. PP.IncrementPasteCounter(true);
  478. Result.startToken();
  479. Result.setKind(tok::identifier);
  480. Result.setLocation(ResultTokLoc);
  481. Result.setLength(LHSLen+RHSLen);
  482. } else {
  483. PP.IncrementPasteCounter(false);
  484. // Make a lexer to lex this string from.
  485. SourceManager &SourceMgr = PP.getSourceManager();
  486. const char *ResultStrData = SourceMgr.getCharacterData(ResultTokLoc);
  487. unsigned FileID = ResultTokLoc.getFileID();
  488. assert(FileID && "Could not get FileID for paste?");
  489. // Make a lexer object so that we lex and expand the paste result.
  490. Lexer *TL = new Lexer(SourceMgr.getBuffer(FileID), FileID, PP,
  491. ResultStrData,
  492. ResultStrData+LHSLen+RHSLen /*don't include null*/);
  493. // Lex a token in raw mode. This way it won't look up identifiers
  494. // automatically, lexing off the end will return an eof token, and
  495. // warnings are disabled. This returns true if the result token is the
  496. // entire buffer.
  497. bool IsComplete = TL->LexRawToken(Result);
  498. // If we got an EOF token, we didn't form even ONE token. For example, we
  499. // did "/ ## /" to get "//".
  500. IsComplete &= Result.getKind() != tok::eof;
  501. isInvalid = !IsComplete;
  502. // We're now done with the temporary lexer.
  503. delete TL;
  504. }
  505. // If pasting the two tokens didn't form a full new token, this is an error.
  506. // This occurs with "x ## +" and other stuff. Return with Tok unmodified
  507. // and with RHS as the next token to lex.
  508. if (isInvalid) {
  509. // If not in assembler language mode.
  510. PP.Diag(PasteOpLoc, diag::err_pp_bad_paste,
  511. std::string(Buffer.begin(), Buffer.end()-1));
  512. return;
  513. }
  514. // Turn ## into 'other' to avoid # ## # from looking like a paste operator.
  515. if (Result.getKind() == tok::hashhash)
  516. Result.setKind(tok::unknown);
  517. // FIXME: Turn __VARRGS__ into "not a token"?
  518. // Transfer properties of the LHS over the the Result.
  519. Result.setFlagValue(LexerToken::StartOfLine , Tok.isAtStartOfLine());
  520. Result.setFlagValue(LexerToken::LeadingSpace, Tok.hasLeadingSpace());
  521. // Finally, replace LHS with the result, consume the RHS, and iterate.
  522. ++CurToken;
  523. Tok = Result;
  524. } while (!isAtEnd() && MacroTokens[CurToken].getKind() == tok::hashhash);
  525. // Now that we got the result token, it will be subject to expansion. Since
  526. // token pasting re-lexes the result token in raw mode, identifier information
  527. // isn't looked up. As such, if the result is an identifier, look up id info.
  528. if (Tok.getKind() == tok::identifier) {
  529. // Look up the identifier info for the token. We disabled identifier lookup
  530. // by saying we're skipping contents, so we need to do this manually.
  531. Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
  532. }
  533. }
  534. /// isNextTokenLParen - If the next token lexed will pop this macro off the
  535. /// expansion stack, return 2. If the next unexpanded token is a '(', return
  536. /// 1, otherwise return 0.
  537. unsigned MacroExpander::isNextTokenLParen() const {
  538. // Out of tokens?
  539. if (isAtEnd())
  540. return 2;
  541. return MacroTokens[CurToken].getKind() == tok::l_paren;
  542. }