DependencyDirectivesSourceMinimizer.cpp 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854
  1. //===- DependencyDirectivesSourceMinimizer.cpp - -------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This is the implementation for minimizing header and source files to the
  11. /// minimum necessary preprocessor directives for evaluating includes. It
  12. /// reduces the source down to #define, #include, #import, @import, and any
  13. /// conditional preprocessor logic that contains one of those.
  14. ///
  15. //===----------------------------------------------------------------------===//
  16. #include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
  17. #include "clang/Basic/CharInfo.h"
  18. #include "clang/Basic/Diagnostic.h"
  19. #include "clang/Lex/LexDiagnostic.h"
  20. #include "llvm/ADT/StringSwitch.h"
  21. #include "llvm/Support/MemoryBuffer.h"
  22. using namespace llvm;
  23. using namespace clang;
  24. using namespace clang::minimize_source_to_dependency_directives;
  25. namespace {
  26. struct Minimizer {
  27. /// Minimized output.
  28. SmallVectorImpl<char> &Out;
  29. /// The known tokens encountered during the minimization.
  30. SmallVectorImpl<Token> &Tokens;
  31. Minimizer(SmallVectorImpl<char> &Out, SmallVectorImpl<Token> &Tokens,
  32. StringRef Input, DiagnosticsEngine *Diags,
  33. SourceLocation InputSourceLoc)
  34. : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags),
  35. InputSourceLoc(InputSourceLoc) {}
  36. /// Lex the provided source and emit the minimized output.
  37. ///
  38. /// \returns True on error.
  39. bool minimize();
  40. private:
  41. struct IdInfo {
  42. const char *Last;
  43. StringRef Name;
  44. };
  45. /// Lex an identifier.
  46. ///
  47. /// \pre First points at a valid identifier head.
  48. LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End);
  49. LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First,
  50. const char *const End);
  51. LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End);
  52. LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
  53. LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
  54. LLVM_NODISCARD bool lexModule(const char *&First, const char *const End);
  55. LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);
  56. LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
  57. LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
  58. LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive,
  59. const char *&First, const char *const End);
  60. Token &makeToken(TokenKind K) {
  61. Tokens.emplace_back(K, Out.size());
  62. return Tokens.back();
  63. }
  64. void popToken() {
  65. Out.resize(Tokens.back().Offset);
  66. Tokens.pop_back();
  67. }
  68. TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; }
  69. Minimizer &put(char Byte) {
  70. Out.push_back(Byte);
  71. return *this;
  72. }
  73. Minimizer &append(StringRef S) { return append(S.begin(), S.end()); }
  74. Minimizer &append(const char *First, const char *Last) {
  75. Out.append(First, Last);
  76. return *this;
  77. }
  78. void printToNewline(const char *&First, const char *const End);
  79. void printAdjacentModuleNameParts(const char *&First, const char *const End);
  80. LLVM_NODISCARD bool printAtImportBody(const char *&First,
  81. const char *const End);
  82. void printDirectiveBody(const char *&First, const char *const End);
  83. void printAdjacentMacroArgs(const char *&First, const char *const End);
  84. LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End);
  85. /// Reports a diagnostic if the diagnostic engine is provided. Always returns
  86. /// true at the end.
  87. bool reportError(const char *CurPtr, unsigned Err);
  88. StringMap<char> SplitIds;
  89. StringRef Input;
  90. DiagnosticsEngine *Diags;
  91. SourceLocation InputSourceLoc;
  92. };
  93. } // end anonymous namespace
  94. bool Minimizer::reportError(const char *CurPtr, unsigned Err) {
  95. if (!Diags)
  96. return true;
  97. assert(CurPtr >= Input.data() && "invalid buffer ptr");
  98. Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err);
  99. return true;
  100. }
  101. static void skipOverSpaces(const char *&First, const char *const End) {
  102. while (First != End && isHorizontalWhitespace(*First))
  103. ++First;
  104. }
  105. LLVM_NODISCARD static bool isRawStringLiteral(const char *First,
  106. const char *Current) {
  107. assert(First <= Current);
  108. // Check if we can even back up.
  109. if (*Current != '"' || First == Current)
  110. return false;
  111. // Check for an "R".
  112. --Current;
  113. if (*Current != 'R')
  114. return false;
  115. if (First == Current || !isIdentifierBody(*--Current))
  116. return true;
  117. // Check for a prefix of "u", "U", or "L".
  118. if (*Current == 'u' || *Current == 'U' || *Current == 'L')
  119. return First == Current || !isIdentifierBody(*--Current);
  120. // Check for a prefix of "u8".
  121. if (*Current != '8' || First == Current || *Current-- != 'u')
  122. return false;
  123. return First == Current || !isIdentifierBody(*--Current);
  124. }
  125. static void skipRawString(const char *&First, const char *const End) {
  126. assert(First[0] == '"');
  127. assert(First[-1] == 'R');
  128. const char *Last = ++First;
  129. while (Last != End && *Last != '(')
  130. ++Last;
  131. if (Last == End) {
  132. First = Last; // Hit the end... just give up.
  133. return;
  134. }
  135. StringRef Terminator(First, Last - First);
  136. for (;;) {
  137. // Move First to just past the next ")".
  138. First = Last;
  139. while (First != End && *First != ')')
  140. ++First;
  141. if (First == End)
  142. return;
  143. ++First;
  144. // Look ahead for the terminator sequence.
  145. Last = First;
  146. while (Last != End && size_t(Last - First) < Terminator.size() &&
  147. Terminator[Last - First] == *Last)
  148. ++Last;
  149. // Check if we hit it (or the end of the file).
  150. if (Last == End) {
  151. First = Last;
  152. return;
  153. }
  154. if (size_t(Last - First) < Terminator.size())
  155. continue;
  156. if (*Last != '"')
  157. continue;
  158. First = Last + 1;
  159. return;
  160. }
  161. }
  162. static void skipString(const char *&First, const char *const End) {
  163. assert(*First == '\'' || *First == '"' || *First == '<');
  164. const char Terminator = *First == '<' ? '>' : *First;
  165. for (++First; First != End && *First != Terminator; ++First)
  166. if (*First == '\\')
  167. if (++First == End)
  168. return;
  169. if (First != End)
  170. ++First; // Finish off the string.
  171. }
  172. static void skipNewline(const char *&First, const char *End) {
  173. assert(isVerticalWhitespace(*First));
  174. ++First;
  175. if (First == End)
  176. return;
  177. // Check for "\n\r" and "\r\n".
  178. if (LLVM_UNLIKELY(isVerticalWhitespace(*First) && First[-1] != First[0]))
  179. ++First;
  180. }
  181. static void skipToNewlineRaw(const char *&First, const char *const End) {
  182. for (;;) {
  183. if (First == End)
  184. return;
  185. if (isVerticalWhitespace(*First))
  186. return;
  187. while (!isVerticalWhitespace(*First))
  188. if (++First == End)
  189. return;
  190. if (First[-1] != '\\')
  191. return;
  192. ++First; // Keep going...
  193. }
  194. }
  195. static const char *reverseOverSpaces(const char *First, const char *Last) {
  196. assert(First <= Last);
  197. while (First != Last && isHorizontalWhitespace(Last[-1]))
  198. --Last;
  199. return Last;
  200. }
  201. static void skipLineComment(const char *&First, const char *const End) {
  202. assert(First[0] == '/' && First[1] == '/');
  203. First += 2;
  204. skipToNewlineRaw(First, End);
  205. }
  206. static void skipBlockComment(const char *&First, const char *const End) {
  207. assert(First[0] == '/' && First[1] == '*');
  208. if (End - First < 4) {
  209. First = End;
  210. return;
  211. }
  212. for (First += 3; First != End; ++First)
  213. if (First[-1] == '*' && First[0] == '/') {
  214. ++First;
  215. return;
  216. }
  217. }
  218. /// \returns True if the current single quotation mark character is a C++ 14
  219. /// digit separator.
  220. static bool isQuoteCppDigitSeparator(const char *const Start,
  221. const char *const Cur,
  222. const char *const End) {
  223. assert(*Cur == '\'' && "expected quotation character");
  224. // skipLine called in places where we don't expect a valid number
  225. // body before `start` on the same line, so always return false at the start.
  226. if (Start == Cur)
  227. return false;
  228. // The previous character must be a valid PP number character.
  229. // Make sure that the L, u, U, u8 prefixes don't get marked as a
  230. // separator though.
  231. char Prev = *(Cur - 1);
  232. if (Prev == 'L' || Prev == 'U' || Prev == 'u')
  233. return false;
  234. if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
  235. return false;
  236. if (!isPreprocessingNumberBody(Prev))
  237. return false;
  238. // The next character should be a valid identifier body character.
  239. return (Cur + 1) < End && isIdentifierBody(*(Cur + 1));
  240. }
  241. static void skipLine(const char *&First, const char *const End) {
  242. do {
  243. assert(First <= End);
  244. if (First == End)
  245. return;
  246. if (isVerticalWhitespace(*First)) {
  247. skipNewline(First, End);
  248. return;
  249. }
  250. const char *Start = First;
  251. while (First != End && !isVerticalWhitespace(*First)) {
  252. // Iterate over strings correctly to avoid comments and newlines.
  253. if (*First == '"' ||
  254. (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {
  255. if (isRawStringLiteral(Start, First))
  256. skipRawString(First, End);
  257. else
  258. skipString(First, End);
  259. continue;
  260. }
  261. // Iterate over comments correctly.
  262. if (*First != '/' || End - First < 2) {
  263. ++First;
  264. continue;
  265. }
  266. if (First[1] == '/') {
  267. // "//...".
  268. skipLineComment(First, End);
  269. continue;
  270. }
  271. if (First[1] != '*') {
  272. ++First;
  273. continue;
  274. }
  275. // "/*...*/".
  276. skipBlockComment(First, End);
  277. }
  278. if (First == End)
  279. return;
  280. // Skip over the newline.
  281. assert(isVerticalWhitespace(*First));
  282. skipNewline(First, End);
  283. } while (First[-2] == '\\'); // Continue past line-continuations.
  284. }
  285. static void skipDirective(StringRef Name, const char *&First,
  286. const char *const End) {
  287. if (llvm::StringSwitch<bool>(Name)
  288. .Case("warning", true)
  289. .Case("error", true)
  290. .Default(false))
  291. // Do not process quotes or comments.
  292. skipToNewlineRaw(First, End);
  293. else
  294. skipLine(First, End);
  295. }
  296. void Minimizer::printToNewline(const char *&First, const char *const End) {
  297. while (First != End && !isVerticalWhitespace(*First)) {
  298. const char *Last = First;
  299. do {
  300. // Iterate over strings correctly to avoid comments and newlines.
  301. if (*Last == '"' || *Last == '\'' ||
  302. (*Last == '<' && top() == pp_include)) {
  303. if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
  304. skipRawString(Last, End);
  305. else
  306. skipString(Last, End);
  307. continue;
  308. }
  309. if (*Last != '/' || End - Last < 2) {
  310. ++Last;
  311. continue; // Gather the rest up to print verbatim.
  312. }
  313. if (Last[1] != '/' && Last[1] != '*') {
  314. ++Last;
  315. continue;
  316. }
  317. // Deal with "//..." and "/*...*/".
  318. append(First, reverseOverSpaces(First, Last));
  319. First = Last;
  320. if (Last[1] == '/') {
  321. skipLineComment(First, End);
  322. return;
  323. }
  324. put(' ');
  325. skipBlockComment(First, End);
  326. skipOverSpaces(First, End);
  327. Last = First;
  328. } while (Last != End && !isVerticalWhitespace(*Last));
  329. // Print out the string.
  330. if (Last == End || Last == First || Last[-1] != '\\') {
  331. append(First, reverseOverSpaces(First, Last));
  332. return;
  333. }
  334. // Print up to the backslash, backing up over spaces.
  335. append(First, reverseOverSpaces(First, Last - 1));
  336. First = Last;
  337. skipNewline(First, End);
  338. skipOverSpaces(First, End);
  339. }
  340. }
  341. static void skipWhitespace(const char *&First, const char *const End) {
  342. for (;;) {
  343. assert(First <= End);
  344. skipOverSpaces(First, End);
  345. if (End - First < 2)
  346. return;
  347. if (First[0] == '\\' && isVerticalWhitespace(First[1])) {
  348. skipNewline(++First, End);
  349. continue;
  350. }
  351. // Check for a non-comment character.
  352. if (First[0] != '/')
  353. return;
  354. // "// ...".
  355. if (First[1] == '/') {
  356. skipLineComment(First, End);
  357. return;
  358. }
  359. // Cannot be a comment.
  360. if (First[1] != '*')
  361. return;
  362. // "/*...*/".
  363. skipBlockComment(First, End);
  364. }
  365. }
  366. void Minimizer::printAdjacentModuleNameParts(const char *&First,
  367. const char *const End) {
  368. // Skip over parts of the body.
  369. const char *Last = First;
  370. do
  371. ++Last;
  372. while (Last != End && (isIdentifierBody(*Last) || *Last == '.'));
  373. append(First, Last);
  374. First = Last;
  375. }
  376. bool Minimizer::printAtImportBody(const char *&First, const char *const End) {
  377. for (;;) {
  378. skipWhitespace(First, End);
  379. if (First == End)
  380. return true;
  381. if (isVerticalWhitespace(*First)) {
  382. skipNewline(First, End);
  383. continue;
  384. }
  385. // Found a semicolon.
  386. if (*First == ';') {
  387. put(*First++).put('\n');
  388. return false;
  389. }
  390. // Don't handle macro expansions inside @import for now.
  391. if (!isIdentifierBody(*First) && *First != '.')
  392. return true;
  393. printAdjacentModuleNameParts(First, End);
  394. }
  395. }
  396. void Minimizer::printDirectiveBody(const char *&First, const char *const End) {
  397. skipWhitespace(First, End); // Skip initial whitespace.
  398. printToNewline(First, End);
  399. while (Out.back() == ' ')
  400. Out.pop_back();
  401. put('\n');
  402. }
  403. LLVM_NODISCARD static const char *lexRawIdentifier(const char *First,
  404. const char *const End) {
  405. assert(isIdentifierBody(*First) && "invalid identifer");
  406. const char *Last = First + 1;
  407. while (Last != End && isIdentifierBody(*Last))
  408. ++Last;
  409. return Last;
  410. }
  411. LLVM_NODISCARD static const char *
  412. getIdentifierContinuation(const char *First, const char *const End) {
  413. if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1]))
  414. return nullptr;
  415. ++First;
  416. skipNewline(First, End);
  417. if (First == End)
  418. return nullptr;
  419. return isIdentifierBody(First[0]) ? First : nullptr;
  420. }
  421. Minimizer::IdInfo Minimizer::lexIdentifier(const char *First,
  422. const char *const End) {
  423. const char *Last = lexRawIdentifier(First, End);
  424. const char *Next = getIdentifierContinuation(Last, End);
  425. if (LLVM_LIKELY(!Next))
  426. return IdInfo{Last, StringRef(First, Last - First)};
  427. // Slow path, where identifiers are split over lines.
  428. SmallVector<char, 64> Id(First, Last);
  429. while (Next) {
  430. Last = lexRawIdentifier(Next, End);
  431. Id.append(Next, Last);
  432. Next = getIdentifierContinuation(Last, End);
  433. }
  434. return IdInfo{
  435. Last,
  436. SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
  437. }
  438. void Minimizer::printAdjacentMacroArgs(const char *&First,
  439. const char *const End) {
  440. // Skip over parts of the body.
  441. const char *Last = First;
  442. do
  443. ++Last;
  444. while (Last != End &&
  445. (isIdentifierBody(*Last) || *Last == '.' || *Last == ','));
  446. append(First, Last);
  447. First = Last;
  448. }
  449. bool Minimizer::printMacroArgs(const char *&First, const char *const End) {
  450. assert(*First == '(');
  451. put(*First++);
  452. for (;;) {
  453. skipWhitespace(First, End);
  454. if (First == End)
  455. return true;
  456. if (*First == ')') {
  457. put(*First++);
  458. return false;
  459. }
  460. // This is intentionally fairly liberal.
  461. if (!(isIdentifierBody(*First) || *First == '.' || *First == ','))
  462. return true;
  463. printAdjacentMacroArgs(First, End);
  464. }
  465. }
  466. /// Looks for an identifier starting from Last.
  467. ///
  468. /// Updates "First" to just past the next identifier, if any. Returns true iff
  469. /// the identifier matches "Id".
  470. bool Minimizer::isNextIdentifier(StringRef Id, const char *&First,
  471. const char *const End) {
  472. skipWhitespace(First, End);
  473. if (First == End || !isIdentifierHead(*First))
  474. return false;
  475. IdInfo FoundId = lexIdentifier(First, End);
  476. First = FoundId.Last;
  477. return FoundId.Name == Id;
  478. }
  479. bool Minimizer::lexAt(const char *&First, const char *const End) {
  480. // Handle "@import".
  481. const char *ImportLoc = First++;
  482. if (!isNextIdentifier("import", First, End)) {
  483. skipLine(First, End);
  484. return false;
  485. }
  486. makeToken(decl_at_import);
  487. append("@import ");
  488. if (printAtImportBody(First, End))
  489. return reportError(
  490. ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import);
  491. skipWhitespace(First, End);
  492. if (First == End)
  493. return false;
  494. if (!isVerticalWhitespace(*First))
  495. return reportError(
  496. ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import);
  497. skipNewline(First, End);
  498. return false;
  499. }
  500. bool Minimizer::lexModule(const char *&First, const char *const End) {
  501. IdInfo Id = lexIdentifier(First, End);
  502. First = Id.Last;
  503. bool Export = false;
  504. if (Id.Name == "export") {
  505. Export = true;
  506. skipWhitespace(First, End);
  507. if (!isIdentifierBody(*First)) {
  508. skipLine(First, End);
  509. return false;
  510. }
  511. Id = lexIdentifier(First, End);
  512. First = Id.Last;
  513. }
  514. if (Id.Name != "module" && Id.Name != "import") {
  515. skipLine(First, End);
  516. return false;
  517. }
  518. skipWhitespace(First, End);
  519. // Ignore this as a module directive if the next character can't be part of
  520. // an import.
  521. switch (*First) {
  522. case ':':
  523. case '<':
  524. case '"':
  525. break;
  526. default:
  527. if (!isIdentifierBody(*First)) {
  528. skipLine(First, End);
  529. return false;
  530. }
  531. }
  532. if (Export) {
  533. makeToken(cxx_export_decl);
  534. append("export ");
  535. }
  536. if (Id.Name == "module")
  537. makeToken(cxx_module_decl);
  538. else
  539. makeToken(cxx_import_decl);
  540. append(Id.Name);
  541. append(" ");
  542. printToNewline(First, End);
  543. append("\n");
  544. return false;
  545. }
  546. bool Minimizer::lexDefine(const char *&First, const char *const End) {
  547. makeToken(pp_define);
  548. append("#define ");
  549. skipWhitespace(First, End);
  550. if (!isIdentifierHead(*First))
  551. return reportError(First, diag::err_pp_macro_not_identifier);
  552. IdInfo Id = lexIdentifier(First, End);
  553. const char *Last = Id.Last;
  554. append(Id.Name);
  555. if (Last == End)
  556. return false;
  557. if (*Last == '(') {
  558. size_t Size = Out.size();
  559. if (printMacroArgs(Last, End)) {
  560. // Be robust to bad macro arguments, since they can show up in disabled
  561. // code.
  562. Out.resize(Size);
  563. append("(/* invalid */\n");
  564. skipLine(Last, End);
  565. return false;
  566. }
  567. }
  568. skipWhitespace(Last, End);
  569. if (Last == End)
  570. return false;
  571. if (!isVerticalWhitespace(*Last))
  572. put(' ');
  573. printDirectiveBody(Last, End);
  574. First = Last;
  575. return false;
  576. }
  577. bool Minimizer::lexPragma(const char *&First, const char *const End) {
  578. // #pragma.
  579. skipWhitespace(First, End);
  580. if (First == End || !isIdentifierHead(*First))
  581. return false;
  582. IdInfo FoundId = lexIdentifier(First, End);
  583. First = FoundId.Last;
  584. if (FoundId.Name == "once") {
  585. // #pragma once
  586. skipLine(First, End);
  587. makeToken(pp_pragma_once);
  588. append("#pragma once\n");
  589. return false;
  590. }
  591. if (FoundId.Name != "clang") {
  592. skipLine(First, End);
  593. return false;
  594. }
  595. // #pragma clang.
  596. if (!isNextIdentifier("module", First, End)) {
  597. skipLine(First, End);
  598. return false;
  599. }
  600. // #pragma clang module.
  601. if (!isNextIdentifier("import", First, End)) {
  602. skipLine(First, End);
  603. return false;
  604. }
  605. // #pragma clang module import.
  606. makeToken(pp_pragma_import);
  607. append("#pragma clang module import ");
  608. printDirectiveBody(First, End);
  609. return false;
  610. }
  611. bool Minimizer::lexEndif(const char *&First, const char *const End) {
  612. // Strip out "#else" if it's empty.
  613. if (top() == pp_else)
  614. popToken();
  615. // Strip out "#elif" if they're empty.
  616. while (top() == pp_elif)
  617. popToken();
  618. // If "#if" is empty, strip it and skip the "#endif".
  619. if (top() == pp_if || top() == pp_ifdef || top() == pp_ifndef) {
  620. popToken();
  621. skipLine(First, End);
  622. return false;
  623. }
  624. return lexDefault(pp_endif, "endif", First, End);
  625. }
  626. bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive,
  627. const char *&First, const char *const End) {
  628. makeToken(Kind);
  629. put('#').append(Directive).put(' ');
  630. printDirectiveBody(First, End);
  631. return false;
  632. }
  633. static bool isStartOfRelevantLine(char First) {
  634. switch (First) {
  635. case '#':
  636. case '@':
  637. case 'i':
  638. case 'e':
  639. case 'm':
  640. return true;
  641. }
  642. return false;
  643. }
  644. bool Minimizer::lexPPLine(const char *&First, const char *const End) {
  645. assert(First != End);
  646. skipWhitespace(First, End);
  647. assert(First <= End);
  648. if (First == End)
  649. return false;
  650. if (!isStartOfRelevantLine(*First)) {
  651. skipLine(First, End);
  652. assert(First <= End);
  653. return false;
  654. }
  655. // Handle "@import".
  656. if (*First == '@')
  657. return lexAt(First, End);
  658. if (*First == 'i' || *First == 'e' || *First == 'm')
  659. return lexModule(First, End);
  660. // Handle preprocessing directives.
  661. ++First; // Skip over '#'.
  662. skipWhitespace(First, End);
  663. if (First == End)
  664. return reportError(First, diag::err_pp_expected_eol);
  665. if (!isIdentifierHead(*First)) {
  666. skipLine(First, End);
  667. return false;
  668. }
  669. // Figure out the token.
  670. IdInfo Id = lexIdentifier(First, End);
  671. First = Id.Last;
  672. auto Kind = llvm::StringSwitch<TokenKind>(Id.Name)
  673. .Case("include", pp_include)
  674. .Case("__include_macros", pp___include_macros)
  675. .Case("define", pp_define)
  676. .Case("undef", pp_undef)
  677. .Case("import", pp_import)
  678. .Case("include_next", pp_include_next)
  679. .Case("if", pp_if)
  680. .Case("ifdef", pp_ifdef)
  681. .Case("ifndef", pp_ifndef)
  682. .Case("elif", pp_elif)
  683. .Case("else", pp_else)
  684. .Case("endif", pp_endif)
  685. .Case("pragma", pp_pragma_import)
  686. .Default(pp_none);
  687. if (Kind == pp_none) {
  688. skipDirective(Id.Name, First, End);
  689. return false;
  690. }
  691. if (Kind == pp_endif)
  692. return lexEndif(First, End);
  693. if (Kind == pp_define)
  694. return lexDefine(First, End);
  695. if (Kind == pp_pragma_import)
  696. return lexPragma(First, End);
  697. // Everything else.
  698. return lexDefault(Kind, Id.Name, First, End);
  699. }
  700. static void skipUTF8ByteOrderMark(const char *&First, const char *const End) {
  701. if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' &&
  702. First[2] == '\xbf')
  703. First += 3;
  704. }
  705. bool Minimizer::minimizeImpl(const char *First, const char *const End) {
  706. skipUTF8ByteOrderMark(First, End);
  707. while (First != End)
  708. if (lexPPLine(First, End))
  709. return true;
  710. return false;
  711. }
  712. bool Minimizer::minimize() {
  713. bool Error = minimizeImpl(Input.begin(), Input.end());
  714. if (!Error) {
  715. // Add a trailing newline and an EOF on success.
  716. if (!Out.empty() && Out.back() != '\n')
  717. Out.push_back('\n');
  718. makeToken(pp_eof);
  719. }
  720. // Null-terminate the output. This way the memory buffer that's passed to
  721. // Clang will not have to worry about the terminating '\0'.
  722. Out.push_back(0);
  723. Out.pop_back();
  724. return Error;
  725. }
  726. bool clang::minimizeSourceToDependencyDirectives(
  727. StringRef Input, SmallVectorImpl<char> &Output,
  728. SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags,
  729. SourceLocation InputSourceLoc) {
  730. Output.clear();
  731. Tokens.clear();
  732. return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize();
  733. }