LexerTest.cpp 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559
  1. //===- unittests/Lex/LexerTest.cpp ------ Lexer tests ---------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. #include "clang/Lex/Lexer.h"
  9. #include "clang/Basic/Diagnostic.h"
  10. #include "clang/Basic/DiagnosticOptions.h"
  11. #include "clang/Basic/FileManager.h"
  12. #include "clang/Basic/LangOptions.h"
  13. #include "clang/Basic/SourceLocation.h"
  14. #include "clang/Basic/SourceManager.h"
  15. #include "clang/Basic/TargetInfo.h"
  16. #include "clang/Basic/TargetOptions.h"
  17. #include "clang/Basic/TokenKinds.h"
  18. #include "clang/Lex/HeaderSearch.h"
  19. #include "clang/Lex/HeaderSearchOptions.h"
  20. #include "clang/Lex/MacroArgs.h"
  21. #include "clang/Lex/MacroInfo.h"
  22. #include "clang/Lex/ModuleLoader.h"
  23. #include "clang/Lex/Preprocessor.h"
  24. #include "clang/Lex/PreprocessorOptions.h"
  25. #include "gmock/gmock.h"
  26. #include "gtest/gtest.h"
  27. #include <vector>
  28. namespace {
  29. using namespace clang;
  30. using testing::ElementsAre;
  31. // The test fixture.
  32. class LexerTest : public ::testing::Test {
  33. protected:
  34. LexerTest()
  35. : FileMgr(FileMgrOpts),
  36. DiagID(new DiagnosticIDs()),
  37. Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
  38. SourceMgr(Diags, FileMgr),
  39. TargetOpts(new TargetOptions)
  40. {
  41. TargetOpts->Triple = "x86_64-apple-darwin11.1.0";
  42. Target = TargetInfo::CreateTargetInfo(Diags, TargetOpts);
  43. }
  44. std::unique_ptr<Preprocessor> CreatePP(StringRef Source,
  45. TrivialModuleLoader &ModLoader) {
  46. std::unique_ptr<llvm::MemoryBuffer> Buf =
  47. llvm::MemoryBuffer::getMemBuffer(Source);
  48. SourceMgr.setMainFileID(SourceMgr.createFileID(std::move(Buf)));
  49. HeaderSearch HeaderInfo(std::make_shared<HeaderSearchOptions>(), SourceMgr,
  50. Diags, LangOpts, Target.get());
  51. std::unique_ptr<Preprocessor> PP = std::make_unique<Preprocessor>(
  52. std::make_shared<PreprocessorOptions>(), Diags, LangOpts, SourceMgr,
  53. HeaderInfo, ModLoader,
  54. /*IILookup =*/nullptr,
  55. /*OwnsHeaderSearch =*/false);
  56. PP->Initialize(*Target);
  57. PP->EnterMainSourceFile();
  58. return PP;
  59. }
  60. std::vector<Token> Lex(StringRef Source) {
  61. TrivialModuleLoader ModLoader;
  62. auto PP = CreatePP(Source, ModLoader);
  63. std::vector<Token> toks;
  64. while (1) {
  65. Token tok;
  66. PP->Lex(tok);
  67. if (tok.is(tok::eof))
  68. break;
  69. toks.push_back(tok);
  70. }
  71. return toks;
  72. }
  73. std::vector<Token> CheckLex(StringRef Source,
  74. ArrayRef<tok::TokenKind> ExpectedTokens) {
  75. auto toks = Lex(Source);
  76. EXPECT_EQ(ExpectedTokens.size(), toks.size());
  77. for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) {
  78. EXPECT_EQ(ExpectedTokens[i], toks[i].getKind());
  79. }
  80. return toks;
  81. }
  82. std::string getSourceText(Token Begin, Token End) {
  83. bool Invalid;
  84. StringRef Str =
  85. Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange(
  86. Begin.getLocation(), End.getLocation())),
  87. SourceMgr, LangOpts, &Invalid);
  88. if (Invalid)
  89. return "<INVALID>";
  90. return Str;
  91. }
  92. FileSystemOptions FileMgrOpts;
  93. FileManager FileMgr;
  94. IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
  95. DiagnosticsEngine Diags;
  96. SourceManager SourceMgr;
  97. LangOptions LangOpts;
  98. std::shared_ptr<TargetOptions> TargetOpts;
  99. IntrusiveRefCntPtr<TargetInfo> Target;
  100. };
  101. TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
  102. std::vector<tok::TokenKind> ExpectedTokens;
  103. ExpectedTokens.push_back(tok::identifier);
  104. ExpectedTokens.push_back(tok::l_paren);
  105. ExpectedTokens.push_back(tok::identifier);
  106. ExpectedTokens.push_back(tok::r_paren);
  107. std::vector<Token> toks = CheckLex("#define M(x) x\n"
  108. "M(f(M(i)))",
  109. ExpectedTokens);
  110. EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2]));
  111. }
  112. TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) {
  113. std::vector<tok::TokenKind> ExpectedTokens;
  114. ExpectedTokens.push_back(tok::identifier);
  115. ExpectedTokens.push_back(tok::identifier);
  116. std::vector<Token> toks = CheckLex("#define M(x) x\n"
  117. "M(M(i) c)",
  118. ExpectedTokens);
  119. EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0]));
  120. }
  121. TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForBeginOfMacro) {
  122. std::vector<tok::TokenKind> ExpectedTokens;
  123. ExpectedTokens.push_back(tok::identifier);
  124. ExpectedTokens.push_back(tok::identifier);
  125. ExpectedTokens.push_back(tok::identifier);
  126. std::vector<Token> toks = CheckLex("#define M(x) x\n"
  127. "M(c c M(i))",
  128. ExpectedTokens);
  129. EXPECT_EQ("c M(i)", getSourceText(toks[1], toks[2]));
  130. }
  131. TEST_F(LexerTest, GetSourceTextExpandsInMacroArgumentForEndOfMacro) {
  132. std::vector<tok::TokenKind> ExpectedTokens;
  133. ExpectedTokens.push_back(tok::identifier);
  134. ExpectedTokens.push_back(tok::identifier);
  135. ExpectedTokens.push_back(tok::identifier);
  136. std::vector<Token> toks = CheckLex("#define M(x) x\n"
  137. "M(M(i) c c)",
  138. ExpectedTokens);
  139. EXPECT_EQ("M(i) c", getSourceText(toks[0], toks[1]));
  140. }
  141. TEST_F(LexerTest, GetSourceTextInSeparateFnMacros) {
  142. std::vector<tok::TokenKind> ExpectedTokens;
  143. ExpectedTokens.push_back(tok::identifier);
  144. ExpectedTokens.push_back(tok::identifier);
  145. ExpectedTokens.push_back(tok::identifier);
  146. ExpectedTokens.push_back(tok::identifier);
  147. std::vector<Token> toks = CheckLex("#define M(x) x\n"
  148. "M(c M(i)) M(M(i) c)",
  149. ExpectedTokens);
  150. EXPECT_EQ("<INVALID>", getSourceText(toks[1], toks[2]));
  151. }
  152. TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) {
  153. std::vector<tok::TokenKind> ExpectedTokens;
  154. ExpectedTokens.push_back(tok::identifier);
  155. ExpectedTokens.push_back(tok::l_paren);
  156. ExpectedTokens.push_back(tok::identifier);
  157. ExpectedTokens.push_back(tok::r_paren);
  158. std::vector<Token> toks = CheckLex("#define M(x) x\n"
  159. "#define C(x) M(x##c)\n"
  160. "M(f(C(i)))",
  161. ExpectedTokens);
  162. EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2]));
  163. }
  164. TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) {
  165. std::vector<tok::TokenKind> ExpectedTokens;
  166. ExpectedTokens.push_back(tok::identifier);
  167. ExpectedTokens.push_back(tok::l_paren);
  168. ExpectedTokens.push_back(tok::identifier);
  169. ExpectedTokens.push_back(tok::r_paren);
  170. std::vector<Token> toks = CheckLex("#define M(x) x\n"
  171. "f(M(M(i)))",
  172. ExpectedTokens);
  173. EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2]));
  174. }
  175. TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) {
  176. std::vector<tok::TokenKind> ExpectedTokens;
  177. ExpectedTokens.push_back(tok::identifier);
  178. ExpectedTokens.push_back(tok::l_paren);
  179. ExpectedTokens.push_back(tok::identifier);
  180. ExpectedTokens.push_back(tok::r_paren);
  181. std::vector<Token> toks = CheckLex("#define M(x) x\n"
  182. "M(f(i))",
  183. ExpectedTokens);
  184. EXPECT_EQ("i", getSourceText(toks[2], toks[2]));
  185. }
  186. TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) {
  187. std::vector<tok::TokenKind> ExpectedTokens;
  188. ExpectedTokens.push_back(tok::identifier);
  189. ExpectedTokens.push_back(tok::l_paren);
  190. ExpectedTokens.push_back(tok::identifier);
  191. ExpectedTokens.push_back(tok::r_paren);
  192. std::vector<Token> toks = CheckLex("#define M(x) x\n"
  193. "#define C(x) x\n"
  194. "f(C(M(i)))",
  195. ExpectedTokens);
  196. EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2]));
  197. }
  198. TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) {
  199. std::vector<tok::TokenKind> ExpectedTokens;
  200. ExpectedTokens.push_back(tok::identifier);
  201. ExpectedTokens.push_back(tok::l_paren);
  202. ExpectedTokens.push_back(tok::identifier);
  203. ExpectedTokens.push_back(tok::identifier);
  204. ExpectedTokens.push_back(tok::r_paren);
  205. std::vector<Token> toks = CheckLex("#define M(x) x\n"
  206. "#define C(x) c x\n"
  207. "f(C(M(i)))",
  208. ExpectedTokens);
  209. EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
  210. }
  211. TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
  212. std::vector<tok::TokenKind> ExpectedTokens;
  213. ExpectedTokens.push_back(tok::identifier);
  214. ExpectedTokens.push_back(tok::identifier);
  215. ExpectedTokens.push_back(tok::l_paren);
  216. ExpectedTokens.push_back(tok::identifier);
  217. ExpectedTokens.push_back(tok::r_paren);
  218. std::vector<Token> toks = CheckLex("#define M(x) x\n"
  219. "#define C(x) c M(x)\n"
  220. "C(f(M(i)))",
  221. ExpectedTokens);
  222. EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
  223. }
  224. TEST_F(LexerTest, LexAPI) {
  225. std::vector<tok::TokenKind> ExpectedTokens;
  226. ExpectedTokens.push_back(tok::l_square);
  227. ExpectedTokens.push_back(tok::identifier);
  228. ExpectedTokens.push_back(tok::r_square);
  229. ExpectedTokens.push_back(tok::l_square);
  230. ExpectedTokens.push_back(tok::identifier);
  231. ExpectedTokens.push_back(tok::r_square);
  232. ExpectedTokens.push_back(tok::identifier);
  233. ExpectedTokens.push_back(tok::identifier);
  234. ExpectedTokens.push_back(tok::identifier);
  235. ExpectedTokens.push_back(tok::identifier);
  236. std::vector<Token> toks = CheckLex("#define M(x) [x]\n"
  237. "#define N(x) x\n"
  238. "#define INN(x) x\n"
  239. "#define NOF1 INN(val)\n"
  240. "#define NOF2 val\n"
  241. "M(foo) N([bar])\n"
  242. "N(INN(val)) N(NOF1) N(NOF2) N(val)",
  243. ExpectedTokens);
  244. SourceLocation lsqrLoc = toks[0].getLocation();
  245. SourceLocation idLoc = toks[1].getLocation();
  246. SourceLocation rsqrLoc = toks[2].getLocation();
  247. CharSourceRange macroRange = SourceMgr.getExpansionRange(lsqrLoc);
  248. SourceLocation Loc;
  249. EXPECT_TRUE(Lexer::isAtStartOfMacroExpansion(lsqrLoc, SourceMgr, LangOpts, &Loc));
  250. EXPECT_EQ(Loc, macroRange.getBegin());
  251. EXPECT_FALSE(Lexer::isAtStartOfMacroExpansion(idLoc, SourceMgr, LangOpts));
  252. EXPECT_FALSE(Lexer::isAtEndOfMacroExpansion(idLoc, SourceMgr, LangOpts));
  253. EXPECT_TRUE(Lexer::isAtEndOfMacroExpansion(rsqrLoc, SourceMgr, LangOpts, &Loc));
  254. EXPECT_EQ(Loc, macroRange.getEnd());
  255. EXPECT_TRUE(macroRange.isTokenRange());
  256. CharSourceRange range = Lexer::makeFileCharRange(
  257. CharSourceRange::getTokenRange(lsqrLoc, idLoc), SourceMgr, LangOpts);
  258. EXPECT_TRUE(range.isInvalid());
  259. range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(idLoc, rsqrLoc),
  260. SourceMgr, LangOpts);
  261. EXPECT_TRUE(range.isInvalid());
  262. range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
  263. SourceMgr, LangOpts);
  264. EXPECT_TRUE(!range.isTokenRange());
  265. EXPECT_EQ(range.getAsRange(),
  266. SourceRange(macroRange.getBegin(),
  267. macroRange.getEnd().getLocWithOffset(1)));
  268. StringRef text = Lexer::getSourceText(
  269. CharSourceRange::getTokenRange(lsqrLoc, rsqrLoc),
  270. SourceMgr, LangOpts);
  271. EXPECT_EQ(text, "M(foo)");
  272. SourceLocation macroLsqrLoc = toks[3].getLocation();
  273. SourceLocation macroIdLoc = toks[4].getLocation();
  274. SourceLocation macroRsqrLoc = toks[5].getLocation();
  275. SourceLocation fileLsqrLoc = SourceMgr.getSpellingLoc(macroLsqrLoc);
  276. SourceLocation fileIdLoc = SourceMgr.getSpellingLoc(macroIdLoc);
  277. SourceLocation fileRsqrLoc = SourceMgr.getSpellingLoc(macroRsqrLoc);
  278. range = Lexer::makeFileCharRange(
  279. CharSourceRange::getTokenRange(macroLsqrLoc, macroIdLoc),
  280. SourceMgr, LangOpts);
  281. EXPECT_EQ(SourceRange(fileLsqrLoc, fileIdLoc.getLocWithOffset(3)),
  282. range.getAsRange());
  283. range = Lexer::makeFileCharRange(CharSourceRange::getTokenRange(macroIdLoc, macroRsqrLoc),
  284. SourceMgr, LangOpts);
  285. EXPECT_EQ(SourceRange(fileIdLoc, fileRsqrLoc.getLocWithOffset(1)),
  286. range.getAsRange());
  287. macroRange = SourceMgr.getExpansionRange(macroLsqrLoc);
  288. range = Lexer::makeFileCharRange(
  289. CharSourceRange::getTokenRange(macroLsqrLoc, macroRsqrLoc),
  290. SourceMgr, LangOpts);
  291. EXPECT_EQ(SourceRange(macroRange.getBegin(), macroRange.getEnd().getLocWithOffset(1)),
  292. range.getAsRange());
  293. text = Lexer::getSourceText(
  294. CharSourceRange::getTokenRange(SourceRange(macroLsqrLoc, macroIdLoc)),
  295. SourceMgr, LangOpts);
  296. EXPECT_EQ(text, "[bar");
  297. SourceLocation idLoc1 = toks[6].getLocation();
  298. SourceLocation idLoc2 = toks[7].getLocation();
  299. SourceLocation idLoc3 = toks[8].getLocation();
  300. SourceLocation idLoc4 = toks[9].getLocation();
  301. EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc1, SourceMgr, LangOpts));
  302. EXPECT_EQ("INN", Lexer::getImmediateMacroName(idLoc2, SourceMgr, LangOpts));
  303. EXPECT_EQ("NOF2", Lexer::getImmediateMacroName(idLoc3, SourceMgr, LangOpts));
  304. EXPECT_EQ("N", Lexer::getImmediateMacroName(idLoc4, SourceMgr, LangOpts));
  305. }
  306. TEST_F(LexerTest, DontMergeMacroArgsFromDifferentMacroFiles) {
  307. std::vector<Token> toks =
  308. Lex("#define helper1 0\n"
  309. "void helper2(const char *, ...);\n"
  310. "#define M1(a, ...) helper2(a, ##__VA_ARGS__)\n"
  311. "#define M2(a, ...) M1(a, helper1, ##__VA_ARGS__)\n"
  312. "void f1() { M2(\"a\", \"b\"); }");
  313. // Check the file corresponding to the "helper1" macro arg in M2.
  314. //
  315. // The lexer used to report its size as 31, meaning that the end of the
  316. // expansion would be on the *next line* (just past `M2("a", "b")`). Make
  317. // sure that we get the correct end location (the comma after "helper1").
  318. SourceLocation helper1ArgLoc = toks[20].getLocation();
  319. EXPECT_EQ(SourceMgr.getFileIDSize(SourceMgr.getFileID(helper1ArgLoc)), 8U);
  320. }
  321. TEST_F(LexerTest, DontOverallocateStringifyArgs) {
  322. TrivialModuleLoader ModLoader;
  323. auto PP = CreatePP("\"StrArg\", 5, 'C'", ModLoader);
  324. llvm::BumpPtrAllocator Allocator;
  325. std::array<IdentifierInfo *, 3> ParamList;
  326. MacroInfo *MI = PP->AllocateMacroInfo({});
  327. MI->setIsFunctionLike();
  328. MI->setParameterList(ParamList, Allocator);
  329. EXPECT_EQ(3u, MI->getNumParams());
  330. EXPECT_TRUE(MI->isFunctionLike());
  331. Token Eof;
  332. Eof.setKind(tok::eof);
  333. std::vector<Token> ArgTokens;
  334. while (1) {
  335. Token tok;
  336. PP->Lex(tok);
  337. if (tok.is(tok::eof)) {
  338. ArgTokens.push_back(Eof);
  339. break;
  340. }
  341. if (tok.is(tok::comma))
  342. ArgTokens.push_back(Eof);
  343. else
  344. ArgTokens.push_back(tok);
  345. }
  346. auto MacroArgsDeleter = [&PP](MacroArgs *M) { M->destroy(*PP); };
  347. std::unique_ptr<MacroArgs, decltype(MacroArgsDeleter)> MA(
  348. MacroArgs::create(MI, ArgTokens, false, *PP), MacroArgsDeleter);
  349. auto StringifyArg = [&](int ArgNo) {
  350. return MA->StringifyArgument(MA->getUnexpArgument(ArgNo), *PP,
  351. /*Charify=*/false, {}, {});
  352. };
  353. Token Result = StringifyArg(0);
  354. EXPECT_EQ(tok::string_literal, Result.getKind());
  355. EXPECT_STREQ("\"\\\"StrArg\\\"\"", Result.getLiteralData());
  356. Result = StringifyArg(1);
  357. EXPECT_EQ(tok::string_literal, Result.getKind());
  358. EXPECT_STREQ("\"5\"", Result.getLiteralData());
  359. Result = StringifyArg(2);
  360. EXPECT_EQ(tok::string_literal, Result.getKind());
  361. EXPECT_STREQ("\"'C'\"", Result.getLiteralData());
  362. #if !defined(NDEBUG) && GTEST_HAS_DEATH_TEST
  363. EXPECT_DEATH(StringifyArg(3), "Invalid arg #");
  364. #endif
  365. }
  366. TEST_F(LexerTest, IsNewLineEscapedValid) {
  367. auto hasNewLineEscaped = [](const char *S) {
  368. return Lexer::isNewLineEscaped(S, S + strlen(S) - 1);
  369. };
  370. EXPECT_TRUE(hasNewLineEscaped("\\\r"));
  371. EXPECT_TRUE(hasNewLineEscaped("\\\n"));
  372. EXPECT_TRUE(hasNewLineEscaped("\\\r\n"));
  373. EXPECT_TRUE(hasNewLineEscaped("\\\n\r"));
  374. EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r"));
  375. EXPECT_TRUE(hasNewLineEscaped("\\ \t\v\f\r\n"));
  376. EXPECT_FALSE(hasNewLineEscaped("\\\r\r"));
  377. EXPECT_FALSE(hasNewLineEscaped("\\\r\r\n"));
  378. EXPECT_FALSE(hasNewLineEscaped("\\\n\n"));
  379. EXPECT_FALSE(hasNewLineEscaped("\r"));
  380. EXPECT_FALSE(hasNewLineEscaped("\n"));
  381. EXPECT_FALSE(hasNewLineEscaped("\r\n"));
  382. EXPECT_FALSE(hasNewLineEscaped("\n\r"));
  383. EXPECT_FALSE(hasNewLineEscaped("\r\r"));
  384. EXPECT_FALSE(hasNewLineEscaped("\n\n"));
  385. }
  386. TEST_F(LexerTest, GetBeginningOfTokenWithEscapedNewLine) {
  387. // Each line should have the same length for
  388. // further offset calculation to be more straightforward.
  389. const unsigned IdentifierLength = 8;
  390. std::string TextToLex = "rabarbar\n"
  391. "foo\\\nbar\n"
  392. "foo\\\rbar\n"
  393. "fo\\\r\nbar\n"
  394. "foo\\\n\rba\n";
  395. std::vector<tok::TokenKind> ExpectedTokens{5, tok::identifier};
  396. std::vector<Token> LexedTokens = CheckLex(TextToLex, ExpectedTokens);
  397. for (const Token &Tok : LexedTokens) {
  398. std::pair<FileID, unsigned> OriginalLocation =
  399. SourceMgr.getDecomposedLoc(Tok.getLocation());
  400. for (unsigned Offset = 0; Offset < IdentifierLength; ++Offset) {
  401. SourceLocation LookupLocation =
  402. Tok.getLocation().getLocWithOffset(Offset);
  403. std::pair<FileID, unsigned> FoundLocation =
  404. SourceMgr.getDecomposedExpansionLoc(
  405. Lexer::GetBeginningOfToken(LookupLocation, SourceMgr, LangOpts));
  406. // Check that location returned by the GetBeginningOfToken
  407. // is the same as original token location reported by Lexer.
  408. EXPECT_EQ(FoundLocation.second, OriginalLocation.second);
  409. }
  410. }
  411. }
  412. TEST_F(LexerTest, AvoidPastEndOfStringDereference) {
  413. EXPECT_TRUE(Lex(" // \\\n").empty());
  414. EXPECT_TRUE(Lex("#include <\\\\").empty());
  415. EXPECT_TRUE(Lex("#include <\\\\\n").empty());
  416. }
  417. TEST_F(LexerTest, StringizingRasString) {
  418. // For "std::string Lexer::Stringify(StringRef Str, bool Charify)".
  419. std::string String1 = R"(foo
  420. {"bar":[]}
  421. baz)";
  422. // For "void Lexer::Stringify(SmallVectorImpl<char> &Str)".
  423. SmallString<128> String2;
  424. String2 += String1.c_str();
  425. // Corner cases.
  426. std::string String3 = R"(\
  427. \n
  428. \\n
  429. \\)";
  430. SmallString<128> String4;
  431. String4 += String3.c_str();
  432. std::string String5 = R"(a\
  433. \\b)";
  434. SmallString<128> String6;
  435. String6 += String5.c_str();
  436. String1 = Lexer::Stringify(StringRef(String1));
  437. Lexer::Stringify(String2);
  438. String3 = Lexer::Stringify(StringRef(String3));
  439. Lexer::Stringify(String4);
  440. String5 = Lexer::Stringify(StringRef(String5));
  441. Lexer::Stringify(String6);
  442. EXPECT_EQ(String1, R"(foo\n {\"bar\":[]}\n baz)");
  443. EXPECT_EQ(String2, R"(foo\n {\"bar\":[]}\n baz)");
  444. EXPECT_EQ(String3, R"(\\\n \\n\n \\\\n\n \\\\)");
  445. EXPECT_EQ(String4, R"(\\\n \\n\n \\\\n\n \\\\)");
  446. EXPECT_EQ(String5, R"(a\\\n\n\n \\\\b)");
  447. EXPECT_EQ(String6, R"(a\\\n\n\n \\\\b)");
  448. }
  449. TEST_F(LexerTest, CharRangeOffByOne) {
  450. std::vector<Token> toks = Lex(R"(#define MOO 1
  451. void foo() { MOO; })");
  452. const Token &moo = toks[5];
  453. EXPECT_EQ(getSourceText(moo, moo), "MOO");
  454. SourceRange R{moo.getLocation(), moo.getLocation()};
  455. EXPECT_TRUE(
  456. Lexer::isAtStartOfMacroExpansion(R.getBegin(), SourceMgr, LangOpts));
  457. EXPECT_TRUE(
  458. Lexer::isAtEndOfMacroExpansion(R.getEnd(), SourceMgr, LangOpts));
  459. CharSourceRange CR = Lexer::getAsCharRange(R, SourceMgr, LangOpts);
  460. EXPECT_EQ(Lexer::getSourceText(CR, SourceMgr, LangOpts), "MOO"); // Was "MO".
  461. }
  462. TEST_F(LexerTest, FindNextToken) {
  463. Lex("int abcd = 0;\n"
  464. "int xyz = abcd;\n");
  465. std::vector<std::string> GeneratedByNextToken;
  466. SourceLocation Loc =
  467. SourceMgr.getLocForStartOfFile(SourceMgr.getMainFileID());
  468. while (true) {
  469. auto T = Lexer::findNextToken(Loc, SourceMgr, LangOpts);
  470. ASSERT_TRUE(T.hasValue());
  471. if (T->is(tok::eof))
  472. break;
  473. GeneratedByNextToken.push_back(getSourceText(*T, *T));
  474. Loc = T->getLocation();
  475. }
  476. EXPECT_THAT(GeneratedByNextToken, ElementsAre("abcd", "=", "0", ";", "int",
  477. "xyz", "=", "abcd", ";"));
  478. }
  479. } // anonymous namespace