TokensTest.cpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781
  1. //===- TokensTest.cpp -----------------------------------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "clang/Tooling/Syntax/Tokens.h"
  9. #include "clang/AST/ASTConsumer.h"
  10. #include "clang/AST/Expr.h"
  11. #include "clang/Basic/Diagnostic.h"
  12. #include "clang/Basic/DiagnosticIDs.h"
  13. #include "clang/Basic/DiagnosticOptions.h"
  14. #include "clang/Basic/FileManager.h"
  15. #include "clang/Basic/FileSystemOptions.h"
  16. #include "clang/Basic/LLVM.h"
  17. #include "clang/Basic/LangOptions.h"
  18. #include "clang/Basic/SourceLocation.h"
  19. #include "clang/Basic/SourceManager.h"
  20. #include "clang/Basic/TokenKinds.def"
  21. #include "clang/Basic/TokenKinds.h"
  22. #include "clang/Frontend/CompilerInstance.h"
  23. #include "clang/Frontend/FrontendAction.h"
  24. #include "clang/Frontend/Utils.h"
  25. #include "clang/Lex/Lexer.h"
  26. #include "clang/Lex/PreprocessorOptions.h"
  27. #include "clang/Lex/Token.h"
  28. #include "clang/Tooling/Tooling.h"
  29. #include "llvm/ADT/ArrayRef.h"
  30. #include "llvm/ADT/IntrusiveRefCntPtr.h"
  31. #include "llvm/ADT/None.h"
  32. #include "llvm/ADT/Optional.h"
  33. #include "llvm/ADT/STLExtras.h"
  34. #include "llvm/ADT/StringRef.h"
  35. #include "llvm/Support/FormatVariadic.h"
  36. #include "llvm/Support/MemoryBuffer.h"
  37. #include "llvm/Support/VirtualFileSystem.h"
  38. #include "llvm/Support/raw_os_ostream.h"
  39. #include "llvm/Support/raw_ostream.h"
  40. #include "llvm/Testing/Support/Annotations.h"
  41. #include "llvm/Testing/Support/SupportHelpers.h"
  42. #include <cassert>
  43. #include <cstdlib>
  44. #include <gmock/gmock.h>
  45. #include <gtest/gtest.h>
  46. #include <memory>
  47. #include <ostream>
  48. #include <string>
  49. using namespace clang;
  50. using namespace clang::syntax;
  51. using llvm::ValueIs;
  52. using ::testing::AllOf;
  53. using ::testing::Contains;
  54. using ::testing::ElementsAre;
  55. using ::testing::Field;
  56. using ::testing::Matcher;
  57. using ::testing::Not;
  58. using ::testing::StartsWith;
  59. namespace {
  60. // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
  61. // argument.
  62. MATCHER_P(SameRange, A, "") {
  63. return A.begin() == arg.begin() && A.end() == arg.end();
  64. }
  65. Matcher<TokenBuffer::Expansion>
  66. IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,
  67. Matcher<llvm::ArrayRef<syntax::Token>> Expanded) {
  68. return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled),
  69. Field(&TokenBuffer::Expansion::Expanded, Expanded));
  70. }
  71. // Matchers for syntax::Token.
  72. MATCHER_P(Kind, K, "") { return arg.kind() == K; }
  73. MATCHER_P2(HasText, Text, SourceMgr, "") {
  74. return arg.text(*SourceMgr) == Text;
  75. }
  76. /// Checks the start and end location of a token are equal to SourceRng.
  77. MATCHER_P(RangeIs, SourceRng, "") {
  78. return arg.location() == SourceRng.first &&
  79. arg.endLocation() == SourceRng.second;
  80. }
  81. class TokenCollectorTest : public ::testing::Test {
  82. public:
  83. /// Run the clang frontend, collect the preprocessed tokens from the frontend
  84. /// invocation and store them in this->Buffer.
  85. /// This also clears SourceManager before running the compiler.
  86. void recordTokens(llvm::StringRef Code) {
  87. class RecordTokens : public ASTFrontendAction {
  88. public:
  89. explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
  90. bool BeginSourceFileAction(CompilerInstance &CI) override {
  91. assert(!Collector && "expected only a single call to BeginSourceFile");
  92. Collector.emplace(CI.getPreprocessor());
  93. return true;
  94. }
  95. void EndSourceFileAction() override {
  96. assert(Collector && "BeginSourceFileAction was never called");
  97. Result = std::move(*Collector).consume();
  98. }
  99. std::unique_ptr<ASTConsumer>
  100. CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
  101. return std::make_unique<ASTConsumer>();
  102. }
  103. private:
  104. TokenBuffer &Result;
  105. llvm::Optional<TokenCollector> Collector;
  106. };
  107. constexpr const char *FileName = "./input.cpp";
  108. FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
  109. // Prepare to run a compiler.
  110. if (!Diags->getClient())
  111. Diags->setClient(new IgnoringDiagConsumer);
  112. std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
  113. FileName};
  114. auto CI = createInvocationFromCommandLine(Args, Diags, FS);
  115. assert(CI);
  116. CI->getFrontendOpts().DisableFree = false;
  117. CI->getPreprocessorOpts().addRemappedFile(
  118. FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
  119. CompilerInstance Compiler;
  120. Compiler.setInvocation(std::move(CI));
  121. Compiler.setDiagnostics(Diags.get());
  122. Compiler.setFileManager(FileMgr.get());
  123. Compiler.setSourceManager(SourceMgr.get());
  124. this->Buffer = TokenBuffer(*SourceMgr);
  125. RecordTokens Recorder(this->Buffer);
  126. ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
  127. << "failed to run the frontend";
  128. }
  129. /// Record the tokens and return a test dump of the resulting buffer.
  130. std::string collectAndDump(llvm::StringRef Code) {
  131. recordTokens(Code);
  132. return Buffer.dumpForTests();
  133. }
  134. // Adds a file to the test VFS.
  135. void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
  136. if (!FS->addFile(Path, time_t(),
  137. llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
  138. ADD_FAILURE() << "could not add a file to VFS: " << Path;
  139. }
  140. }
  141. /// Add a new file, run syntax::tokenize() on it and return the results.
  142. std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
  143. // FIXME: pass proper LangOptions.
  144. return syntax::tokenize(
  145. SourceMgr->createFileID(llvm::MemoryBuffer::getMemBufferCopy(Text)),
  146. *SourceMgr, LangOptions());
  147. }
  148. // Specialized versions of matchers that hide the SourceManager from clients.
  149. Matcher<syntax::Token> HasText(std::string Text) const {
  150. return ::HasText(Text, SourceMgr.get());
  151. }
  152. Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
  153. std::pair<SourceLocation, SourceLocation> Ls;
  154. Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
  155. .getLocWithOffset(R.Begin);
  156. Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
  157. .getLocWithOffset(R.End);
  158. return ::RangeIs(Ls);
  159. }
  160. /// Finds a subrange in O(n * m).
  161. template <class T, class U, class Eq>
  162. llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
  163. llvm::ArrayRef<T> Range, Eq F) {
  164. for (auto Begin = Range.begin(); Begin < Range.end(); ++Begin) {
  165. auto It = Begin;
  166. for (auto ItSub = Subrange.begin();
  167. ItSub != Subrange.end() && It != Range.end(); ++ItSub, ++It) {
  168. if (!F(*ItSub, *It))
  169. goto continue_outer;
  170. }
  171. return llvm::makeArrayRef(Begin, It);
  172. continue_outer:;
  173. }
  174. return llvm::makeArrayRef(Range.end(), Range.end());
  175. }
  176. /// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
  177. /// The match should be unique. \p Query is a whitespace-separated list of
  178. /// tokens to search for.
  179. llvm::ArrayRef<syntax::Token>
  180. findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
  181. llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
  182. Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
  183. if (QueryTokens.empty()) {
  184. ADD_FAILURE() << "will not look for an empty list of tokens";
  185. std::abort();
  186. }
  187. // An equality test for search.
  188. auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
  189. return Q == T.text(*SourceMgr);
  190. };
  191. // Find a match.
  192. auto Found =
  193. findSubrange(llvm::makeArrayRef(QueryTokens), Tokens, TextMatches);
  194. if (Found.begin() == Tokens.end()) {
  195. ADD_FAILURE() << "could not find the subrange for " << Query;
  196. std::abort();
  197. }
  198. // Check that the match is unique.
  199. if (findSubrange(llvm::makeArrayRef(QueryTokens),
  200. llvm::makeArrayRef(Found.end(), Tokens.end()), TextMatches)
  201. .begin() != Tokens.end()) {
  202. ADD_FAILURE() << "match is not unique for " << Query;
  203. std::abort();
  204. }
  205. return Found;
  206. };
  207. // Specialized versions of findTokenRange for expanded and spelled tokens.
  208. llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
  209. return findTokenRange(Query, Buffer.expandedTokens());
  210. }
  211. llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
  212. FileID File = FileID()) {
  213. if (!File.isValid())
  214. File = SourceMgr->getMainFileID();
  215. return findTokenRange(Query, Buffer.spelledTokens(File));
  216. }
  217. // Data fields.
  218. llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
  219. new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
  220. IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
  221. new llvm::vfs::InMemoryFileSystem;
  222. llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
  223. new FileManager(FileSystemOptions(), FS);
  224. llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
  225. new SourceManager(*Diags, *FileMgr);
  226. /// Contains last result of calling recordTokens().
  227. TokenBuffer Buffer = TokenBuffer(*SourceMgr);
  228. };
  229. TEST_F(TokenCollectorTest, RawMode) {
  230. EXPECT_THAT(tokenize("int main() {}"),
  231. ElementsAre(Kind(tok::kw_int),
  232. AllOf(HasText("main"), Kind(tok::identifier)),
  233. Kind(tok::l_paren), Kind(tok::r_paren),
  234. Kind(tok::l_brace), Kind(tok::r_brace)));
  235. // Comments are ignored for now.
  236. EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
  237. ElementsAre(Kind(tok::kw_int),
  238. AllOf(HasText("a"), Kind(tok::identifier)),
  239. Kind(tok::semi)));
  240. }
  241. TEST_F(TokenCollectorTest, Basic) {
  242. std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
  243. {"int main() {}",
  244. R"(expanded tokens:
  245. int main ( ) { }
  246. file './input.cpp'
  247. spelled tokens:
  248. int main ( ) { }
  249. no mappings.
  250. )"},
  251. // All kinds of whitespace are ignored.
  252. {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n",
  253. R"(expanded tokens:
  254. int main ( ) { }
  255. file './input.cpp'
  256. spelled tokens:
  257. int main ( ) { }
  258. no mappings.
  259. )"},
  260. // Annotation tokens are ignored.
  261. {R"cpp(
  262. #pragma GCC visibility push (public)
  263. #pragma GCC visibility pop
  264. )cpp",
  265. R"(expanded tokens:
  266. <empty>
  267. file './input.cpp'
  268. spelled tokens:
  269. # pragma GCC visibility push ( public ) # pragma GCC visibility pop
  270. mappings:
  271. ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
  272. )"},
  273. // Empty files should not crash.
  274. {R"cpp()cpp", R"(expanded tokens:
  275. <empty>
  276. file './input.cpp'
  277. spelled tokens:
  278. <empty>
  279. no mappings.
  280. )"},
  281. // Should not crash on errors inside '#define' directives. Error is that
  282. // stringification (#B) does not refer to a macro parameter.
  283. {
  284. R"cpp(
  285. a
  286. #define MACRO() A #B
  287. )cpp",
  288. R"(expanded tokens:
  289. a
  290. file './input.cpp'
  291. spelled tokens:
  292. a # define MACRO ( ) A # B
  293. mappings:
  294. ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1)
  295. )"}};
  296. for (auto &Test : TestCases)
  297. EXPECT_EQ(collectAndDump(Test.first), Test.second)
  298. << collectAndDump(Test.first);
  299. }
  300. TEST_F(TokenCollectorTest, Locations) {
  301. // Check locations of the tokens.
  302. llvm::Annotations Code(R"cpp(
  303. $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
  304. )cpp");
  305. recordTokens(Code.code());
  306. // Check expanded tokens.
  307. EXPECT_THAT(
  308. Buffer.expandedTokens(),
  309. ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
  310. AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
  311. AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
  312. AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
  313. AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
  314. Kind(tok::eof)));
  315. // Check spelled tokens.
  316. EXPECT_THAT(
  317. Buffer.spelledTokens(SourceMgr->getMainFileID()),
  318. ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
  319. AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
  320. AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
  321. AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
  322. AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
  323. }
  324. TEST_F(TokenCollectorTest, MacroDirectives) {
  325. // Macro directives are not stored anywhere at the moment.
  326. std::string Code = R"cpp(
  327. #define FOO a
  328. #include "unresolved_file.h"
  329. #undef FOO
  330. #ifdef X
  331. #else
  332. #endif
  333. #ifndef Y
  334. #endif
  335. #if 1
  336. #elif 2
  337. #else
  338. #endif
  339. #pragma once
  340. #pragma something lalala
  341. int a;
  342. )cpp";
  343. std::string Expected =
  344. "expanded tokens:\n"
  345. " int a ;\n"
  346. "file './input.cpp'\n"
  347. " spelled tokens:\n"
  348. " # define FOO a # include \"unresolved_file.h\" # undef FOO "
  349. "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
  350. "# endif # pragma once # pragma something lalala int a ;\n"
  351. " mappings:\n"
  352. " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
  353. EXPECT_EQ(collectAndDump(Code), Expected);
  354. }
  355. TEST_F(TokenCollectorTest, MacroReplacements) {
  356. std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
  357. // A simple object-like macro.
  358. {R"cpp(
  359. #define INT int const
  360. INT a;
  361. )cpp",
  362. R"(expanded tokens:
  363. int const a ;
  364. file './input.cpp'
  365. spelled tokens:
  366. # define INT int const INT a ;
  367. mappings:
  368. ['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
  369. ['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
  370. )"},
  371. // A simple function-like macro.
  372. {R"cpp(
  373. #define INT(a) const int
  374. INT(10+10) a;
  375. )cpp",
  376. R"(expanded tokens:
  377. const int a ;
  378. file './input.cpp'
  379. spelled tokens:
  380. # define INT ( a ) const int INT ( 10 + 10 ) a ;
  381. mappings:
  382. ['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
  383. ['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
  384. )"},
  385. // Recursive macro replacements.
  386. {R"cpp(
  387. #define ID(X) X
  388. #define INT int const
  389. ID(ID(INT)) a;
  390. )cpp",
  391. R"(expanded tokens:
  392. int const a ;
  393. file './input.cpp'
  394. spelled tokens:
  395. # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
  396. mappings:
  397. ['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
  398. ['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
  399. )"},
  400. // A little more complicated recursive macro replacements.
  401. {R"cpp(
  402. #define ADD(X, Y) X+Y
  403. #define MULT(X, Y) X*Y
  404. int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
  405. )cpp",
  406. "expanded tokens:\n"
  407. " int a = 1 * 2 + 3 * 4 + 5 ;\n"
  408. "file './input.cpp'\n"
  409. " spelled tokens:\n"
  410. " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
  411. "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
  412. " mappings:\n"
  413. " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
  414. " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
  415. // Empty macro replacement.
  416. // FIXME: the #define directives should not be glued together.
  417. {R"cpp(
  418. #define EMPTY
  419. #define EMPTY_FUNC(X)
  420. EMPTY
  421. EMPTY_FUNC(1+2+3)
  422. )cpp",
  423. R"(expanded tokens:
  424. <empty>
  425. file './input.cpp'
  426. spelled tokens:
  427. # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
  428. mappings:
  429. ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
  430. ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
  431. ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
  432. )"},
  433. // File ends with a macro replacement.
  434. {R"cpp(
  435. #define FOO 10+10;
  436. int a = FOO
  437. )cpp",
  438. R"(expanded tokens:
  439. int a = 10 + 10 ;
  440. file './input.cpp'
  441. spelled tokens:
  442. # define FOO 10 + 10 ; int a = FOO
  443. mappings:
  444. ['#'_0, 'int'_7) => ['int'_0, 'int'_0)
  445. ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
  446. )"}};
  447. for (auto &Test : TestCases)
  448. EXPECT_EQ(Test.second, collectAndDump(Test.first))
  449. << collectAndDump(Test.first);
  450. }
  451. TEST_F(TokenCollectorTest, SpecialTokens) {
  452. // Tokens coming from concatenations.
  453. recordTokens(R"cpp(
  454. #define CONCAT(a, b) a ## b
  455. int a = CONCAT(1, 2);
  456. )cpp");
  457. EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
  458. Contains(HasText("12")));
  459. // Multi-line tokens with slashes at the end.
  460. recordTokens("i\\\nn\\\nt");
  461. EXPECT_THAT(Buffer.expandedTokens(),
  462. ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
  463. Kind(tok::eof)));
  464. // FIXME: test tokens with digraphs and UCN identifiers.
  465. }
  466. TEST_F(TokenCollectorTest, LateBoundTokens) {
  467. // The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
  468. // but we choose to record them as a single token (for now).
  469. llvm::Annotations Code(R"cpp(
  470. template <class T>
  471. struct foo { int a; };
  472. int bar = foo<foo<int$br[[>>]]().a;
  473. int baz = 10 $op[[>>]] 2;
  474. )cpp");
  475. recordTokens(Code.code());
  476. EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
  477. AllOf(Contains(AllOf(Kind(tok::greatergreater),
  478. RangeIs(Code.range("br")))),
  479. Contains(AllOf(Kind(tok::greatergreater),
  480. RangeIs(Code.range("op"))))));
  481. }
  482. TEST_F(TokenCollectorTest, DelayedParsing) {
  483. llvm::StringLiteral Code = R"cpp(
  484. struct Foo {
  485. int method() {
  486. // Parser will visit method bodies and initializers multiple times, but
  487. // TokenBuffer should only record the first walk over the tokens;
  488. return 100;
  489. }
  490. int a = 10;
  491. struct Subclass {
  492. void foo() {
  493. Foo().method();
  494. }
  495. };
  496. };
  497. )cpp";
  498. std::string ExpectedTokens =
  499. "expanded tokens:\n"
  500. " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
  501. "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
  502. EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
  503. }
  504. TEST_F(TokenCollectorTest, MultiFile) {
  505. addFile("./foo.h", R"cpp(
  506. #define ADD(X, Y) X+Y
  507. int a = 100;
  508. #include "bar.h"
  509. )cpp");
  510. addFile("./bar.h", R"cpp(
  511. int b = ADD(1, 2);
  512. #define MULT(X, Y) X*Y
  513. )cpp");
  514. llvm::StringLiteral Code = R"cpp(
  515. #include "foo.h"
  516. int c = ADD(1, MULT(2,3));
  517. )cpp";
  518. std::string Expected = R"(expanded tokens:
  519. int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
  520. file './input.cpp'
  521. spelled tokens:
  522. # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
  523. mappings:
  524. ['#'_0, 'int'_3) => ['int'_12, 'int'_12)
  525. ['ADD'_6, ';'_17) => ['1'_15, ';'_20)
  526. file './foo.h'
  527. spelled tokens:
  528. # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
  529. mappings:
  530. ['#'_0, 'int'_11) => ['int'_0, 'int'_0)
  531. ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
  532. file './bar.h'
  533. spelled tokens:
  534. int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
  535. mappings:
  536. ['ADD'_3, ';'_9) => ['1'_8, ';'_11)
  537. ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
  538. )";
  539. EXPECT_EQ(Expected, collectAndDump(Code))
  540. << "input: " << Code << "\nresults: " << collectAndDump(Code);
  541. }
  542. class TokenBufferTest : public TokenCollectorTest {};
  543. TEST_F(TokenBufferTest, SpelledByExpanded) {
  544. recordTokens(R"cpp(
  545. a1 a2 a3 b1 b2
  546. )cpp");
  547. // Sanity check: expanded and spelled tokens are stored separately.
  548. EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
  549. // Searching for subranges of expanded tokens should give the corresponding
  550. // spelled ones.
  551. EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
  552. ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
  553. EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
  554. ValueIs(SameRange(findSpelled("a1 a2 a3"))));
  555. EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
  556. ValueIs(SameRange(findSpelled("b1 b2"))));
  557. // Test search on simple macro expansions.
  558. recordTokens(R"cpp(
  559. #define A a1 a2 a3
  560. #define B b1 b2
  561. A split B
  562. )cpp");
  563. EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
  564. ValueIs(SameRange(findSpelled("A split B"))));
  565. EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
  566. ValueIs(SameRange(findSpelled("A split").drop_back())));
  567. EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
  568. ValueIs(SameRange(findSpelled("split B").drop_front())));
  569. // Ranges not fully covering macro invocations should fail.
  570. EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
  571. EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), llvm::None);
  572. EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
  573. llvm::None);
  574. // Recursive macro invocations.
  575. recordTokens(R"cpp(
  576. #define ID(x) x
  577. #define B b1 b2
  578. ID(ID(ID(a1) a2 a3)) split ID(B)
  579. )cpp");
  580. EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
  581. ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )"))));
  582. EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
  583. ValueIs(SameRange(findSpelled("ID ( B )"))));
  584. EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
  585. ValueIs(SameRange(findSpelled(
  586. "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
  587. // Ranges crossing macro call boundaries.
  588. EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1")),
  589. llvm::None);
  590. EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1")),
  591. llvm::None);
  592. // FIXME: next two examples should map to macro arguments, but currently they
  593. // fail.
  594. EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2")), llvm::None);
  595. EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), llvm::None);
  596. // Empty macro expansions.
  597. recordTokens(R"cpp(
  598. #define EMPTY
  599. #define ID(X) X
  600. EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
  601. EMPTY EMPTY ID(4 5 6) split2
  602. ID(7 8 9) EMPTY EMPTY
  603. )cpp");
  604. EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
  605. ValueIs(SameRange(findSpelled("ID ( 1 2 3 )"))));
  606. EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
  607. ValueIs(SameRange(findSpelled("ID ( 4 5 6 )"))));
  608. EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
  609. ValueIs(SameRange(findSpelled("ID ( 7 8 9 )"))));
  610. // Empty mappings coming from various directives.
  611. recordTokens(R"cpp(
  612. #define ID(X) X
  613. ID(1)
  614. #pragma lalala
  615. not_mapped
  616. )cpp");
  617. EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
  618. ValueIs(SameRange(findSpelled("not_mapped"))));
  619. }
  620. TEST_F(TokenBufferTest, ExpansionStartingAt) {
  621. // Object-like macro expansions.
  622. recordTokens(R"cpp(
  623. #define FOO 3+4
  624. int a = FOO 1;
  625. int b = FOO 2;
  626. )cpp");
  627. llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1").drop_back();
  628. EXPECT_THAT(
  629. Buffer.expansionStartingAt(Foo1.data()),
  630. ValueIs(IsExpansion(SameRange(Foo1),
  631. SameRange(findExpanded("3 + 4 1").drop_back()))));
  632. llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2").drop_back();
  633. EXPECT_THAT(
  634. Buffer.expansionStartingAt(Foo2.data()),
  635. ValueIs(IsExpansion(SameRange(Foo2),
  636. SameRange(findExpanded("3 + 4 2").drop_back()))));
  637. // Function-like macro expansions.
  638. recordTokens(R"cpp(
  639. #define ID(X) X
  640. int a = ID(1+2+3);
  641. int b = ID(ID(2+3+4));
  642. )cpp");
  643. llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )");
  644. EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()),
  645. ValueIs(IsExpansion(SameRange(ID1),
  646. SameRange(findExpanded("1 + 2 + 3")))));
  647. // Only the first spelled token should be found.
  648. for (const auto &T : ID1.drop_front())
  649. EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
  650. llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )");
  651. EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()),
  652. ValueIs(IsExpansion(SameRange(ID2),
  653. SameRange(findExpanded("2 + 3 + 4")))));
  654. // Only the first spelled token should be found.
  655. for (const auto &T : ID2.drop_front())
  656. EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
  657. // PP directives.
  658. recordTokens(R"cpp(
  659. #define FOO 1
  660. int a = FOO;
  661. #pragma once
  662. int b = 1;
  663. )cpp");
  664. llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1");
  665. EXPECT_THAT(
  666. Buffer.expansionStartingAt(&DefineFoo.front()),
  667. ValueIs(IsExpansion(SameRange(DefineFoo),
  668. SameRange(findExpanded("int a").take_front(0)))));
  669. // Only the first spelled token should be found.
  670. for (const auto &T : DefineFoo.drop_front())
  671. EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
  672. llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once");
  673. EXPECT_THAT(
  674. Buffer.expansionStartingAt(&PragmaOnce.front()),
  675. ValueIs(IsExpansion(SameRange(PragmaOnce),
  676. SameRange(findExpanded("int b").take_front(0)))));
  677. // Only the first spelled token should be found.
  678. for (const auto &T : PragmaOnce.drop_front())
  679. EXPECT_EQ(Buffer.expansionStartingAt(&T), llvm::None);
  680. }
  681. TEST_F(TokenBufferTest, TokensToFileRange) {
  682. addFile("./foo.h", "token_from_header");
  683. llvm::Annotations Code(R"cpp(
  684. #define FOO token_from_expansion
  685. #include "./foo.h"
  686. $all[[$i[[int]] a = FOO;]]
  687. )cpp");
  688. recordTokens(Code.code());
  689. auto &SM = *SourceMgr;
  690. // Two simple examples.
  691. auto Int = findExpanded("int").front();
  692. auto Semi = findExpanded(";").front();
  693. EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
  694. Code.range("i").End));
  695. EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
  696. FileRange(SM.getMainFileID(), Code.range("all").Begin,
  697. Code.range("all").End));
  698. // We don't test assertion failures because death tests are slow.
  699. }
  700. TEST_F(TokenBufferTest, macroExpansions) {
  701. llvm::Annotations Code(R"cpp(
  702. #define FOO B
  703. #define FOO2 BA
  704. #define CALL(X) int X
  705. #define G CALL(FOO2)
  706. int B;
  707. $macro[[FOO]];
  708. $macro[[CALL]](A);
  709. $macro[[G]];
  710. )cpp");
  711. recordTokens(Code.code());
  712. auto &SM = *SourceMgr;
  713. auto Expansions = Buffer.macroExpansions(SM.getMainFileID());
  714. std::vector<FileRange> ExpectedMacroRanges;
  715. for (auto Range : Code.ranges("macro"))
  716. ExpectedMacroRanges.push_back(
  717. FileRange(SM.getMainFileID(), Range.Begin, Range.End));
  718. std::vector<FileRange> ActualMacroRanges;
  719. for (auto Expansion : Expansions)
  720. ActualMacroRanges.push_back(Expansion->range(SM));
  721. EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges);
  722. }
  723. } // namespace