DependencyDirectivesSourceMinimizer.cpp 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955
  1. //===- DependencyDirectivesSourceMinimizer.cpp - -------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// This is the implementation for minimizing header and source files to the
  11. /// minimum necessary preprocessor directives for evaluating includes. It
  12. /// reduces the source down to #define, #include, #import, @import, and any
  13. /// conditional preprocessor logic that contains one of those.
  14. ///
  15. //===----------------------------------------------------------------------===//
  16. #include "clang/Lex/DependencyDirectivesSourceMinimizer.h"
  17. #include "clang/Basic/CharInfo.h"
  18. #include "clang/Basic/Diagnostic.h"
  19. #include "clang/Lex/LexDiagnostic.h"
  20. #include "llvm/ADT/StringSwitch.h"
  21. #include "llvm/Support/MemoryBuffer.h"
  22. using namespace llvm;
  23. using namespace clang;
  24. using namespace clang::minimize_source_to_dependency_directives;
  25. namespace {
  26. struct Minimizer {
  27. /// Minimized output.
  28. SmallVectorImpl<char> &Out;
  29. /// The known tokens encountered during the minimization.
  30. SmallVectorImpl<Token> &Tokens;
  31. Minimizer(SmallVectorImpl<char> &Out, SmallVectorImpl<Token> &Tokens,
  32. StringRef Input, DiagnosticsEngine *Diags,
  33. SourceLocation InputSourceLoc)
  34. : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags),
  35. InputSourceLoc(InputSourceLoc) {}
  36. /// Lex the provided source and emit the minimized output.
  37. ///
  38. /// \returns True on error.
  39. bool minimize();
  40. private:
  41. struct IdInfo {
  42. const char *Last;
  43. StringRef Name;
  44. };
  45. /// Lex an identifier.
  46. ///
  47. /// \pre First points at a valid identifier head.
  48. LLVM_NODISCARD IdInfo lexIdentifier(const char *First, const char *const End);
  49. LLVM_NODISCARD bool isNextIdentifier(StringRef Id, const char *&First,
  50. const char *const End);
  51. LLVM_NODISCARD bool minimizeImpl(const char *First, const char *const End);
  52. LLVM_NODISCARD bool lexPPLine(const char *&First, const char *const End);
  53. LLVM_NODISCARD bool lexAt(const char *&First, const char *const End);
  54. LLVM_NODISCARD bool lexModule(const char *&First, const char *const End);
  55. LLVM_NODISCARD bool lexDefine(const char *&First, const char *const End);
  56. LLVM_NODISCARD bool lexPragma(const char *&First, const char *const End);
  57. LLVM_NODISCARD bool lexEndif(const char *&First, const char *const End);
  58. LLVM_NODISCARD bool lexDefault(TokenKind Kind, StringRef Directive,
  59. const char *&First, const char *const End);
  60. Token &makeToken(TokenKind K) {
  61. Tokens.emplace_back(K, Out.size());
  62. return Tokens.back();
  63. }
  64. void popToken() {
  65. Out.resize(Tokens.back().Offset);
  66. Tokens.pop_back();
  67. }
  68. TokenKind top() const { return Tokens.empty() ? pp_none : Tokens.back().K; }
  69. Minimizer &put(char Byte) {
  70. Out.push_back(Byte);
  71. return *this;
  72. }
  73. Minimizer &append(StringRef S) { return append(S.begin(), S.end()); }
  74. Minimizer &append(const char *First, const char *Last) {
  75. Out.append(First, Last);
  76. return *this;
  77. }
  78. void printToNewline(const char *&First, const char *const End);
  79. void printAdjacentModuleNameParts(const char *&First, const char *const End);
  80. LLVM_NODISCARD bool printAtImportBody(const char *&First,
  81. const char *const End);
  82. void printDirectiveBody(const char *&First, const char *const End);
  83. void printAdjacentMacroArgs(const char *&First, const char *const End);
  84. LLVM_NODISCARD bool printMacroArgs(const char *&First, const char *const End);
  85. /// Reports a diagnostic if the diagnostic engine is provided. Always returns
  86. /// true at the end.
  87. bool reportError(const char *CurPtr, unsigned Err);
  88. StringMap<char> SplitIds;
  89. StringRef Input;
  90. DiagnosticsEngine *Diags;
  91. SourceLocation InputSourceLoc;
  92. };
  93. } // end anonymous namespace
  94. bool Minimizer::reportError(const char *CurPtr, unsigned Err) {
  95. if (!Diags)
  96. return true;
  97. assert(CurPtr >= Input.data() && "invalid buffer ptr");
  98. Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err);
  99. return true;
  100. }
  101. static void skipOverSpaces(const char *&First, const char *const End) {
  102. while (First != End && isHorizontalWhitespace(*First))
  103. ++First;
  104. }
  105. LLVM_NODISCARD static bool isRawStringLiteral(const char *First,
  106. const char *Current) {
  107. assert(First <= Current);
  108. // Check if we can even back up.
  109. if (*Current != '"' || First == Current)
  110. return false;
  111. // Check for an "R".
  112. --Current;
  113. if (*Current != 'R')
  114. return false;
  115. if (First == Current || !isIdentifierBody(*--Current))
  116. return true;
  117. // Check for a prefix of "u", "U", or "L".
  118. if (*Current == 'u' || *Current == 'U' || *Current == 'L')
  119. return First == Current || !isIdentifierBody(*--Current);
  120. // Check for a prefix of "u8".
  121. if (*Current != '8' || First == Current || *Current-- != 'u')
  122. return false;
  123. return First == Current || !isIdentifierBody(*--Current);
  124. }
  125. static void skipRawString(const char *&First, const char *const End) {
  126. assert(First[0] == '"');
  127. assert(First[-1] == 'R');
  128. const char *Last = ++First;
  129. while (Last != End && *Last != '(')
  130. ++Last;
  131. if (Last == End) {
  132. First = Last; // Hit the end... just give up.
  133. return;
  134. }
  135. StringRef Terminator(First, Last - First);
  136. for (;;) {
  137. // Move First to just past the next ")".
  138. First = Last;
  139. while (First != End && *First != ')')
  140. ++First;
  141. if (First == End)
  142. return;
  143. ++First;
  144. // Look ahead for the terminator sequence.
  145. Last = First;
  146. while (Last != End && size_t(Last - First) < Terminator.size() &&
  147. Terminator[Last - First] == *Last)
  148. ++Last;
  149. // Check if we hit it (or the end of the file).
  150. if (Last == End) {
  151. First = Last;
  152. return;
  153. }
  154. if (size_t(Last - First) < Terminator.size())
  155. continue;
  156. if (*Last != '"')
  157. continue;
  158. First = Last + 1;
  159. return;
  160. }
  161. }
  162. // Returns the length of EOL, either 0 (no end-of-line), 1 (\n) or 2 (\r\n)
  163. static unsigned isEOL(const char *First, const char *const End) {
  164. if (First == End)
  165. return 0;
  166. if (End - First > 1 && isVerticalWhitespace(First[0]) &&
  167. isVerticalWhitespace(First[1]) && First[0] != First[1])
  168. return 2;
  169. return !!isVerticalWhitespace(First[0]);
  170. }
  171. static void skipString(const char *&First, const char *const End) {
  172. assert(*First == '\'' || *First == '"' || *First == '<');
  173. const char Terminator = *First == '<' ? '>' : *First;
  174. for (++First; First != End && *First != Terminator; ++First) {
  175. // String and character literals don't extend past the end of the line.
  176. if (isVerticalWhitespace(*First))
  177. return;
  178. if (*First != '\\')
  179. continue;
  180. // Skip past backslash to the next character. This ensures that the
  181. // character right after it is skipped as well, which matters if it's
  182. // the terminator.
  183. if (++First == End)
  184. return;
  185. if (!isWhitespace(*First))
  186. continue;
  187. // Whitespace after the backslash might indicate a line continuation.
  188. const char *FirstAfterBackslashPastSpace = First;
  189. skipOverSpaces(FirstAfterBackslashPastSpace, End);
  190. if (unsigned NLSize = isEOL(FirstAfterBackslashPastSpace, End)) {
  191. // Advance the character pointer to the next line for the next
  192. // iteration.
  193. First = FirstAfterBackslashPastSpace + NLSize - 1;
  194. }
  195. }
  196. if (First != End)
  197. ++First; // Finish off the string.
  198. }
  199. // Returns the length of the skipped newline
  200. static unsigned skipNewline(const char *&First, const char *End) {
  201. if (First == End)
  202. return 0;
  203. assert(isVerticalWhitespace(*First));
  204. unsigned Len = isEOL(First, End);
  205. assert(Len && "expected newline");
  206. First += Len;
  207. return Len;
  208. }
  209. static bool wasLineContinuation(const char *First, unsigned EOLLen) {
  210. return *(First - (int)EOLLen - 1) == '\\';
  211. }
  212. static void skipToNewlineRaw(const char *&First, const char *const End) {
  213. for (;;) {
  214. if (First == End)
  215. return;
  216. unsigned Len = isEOL(First, End);
  217. if (Len)
  218. return;
  219. do {
  220. if (++First == End)
  221. return;
  222. Len = isEOL(First, End);
  223. } while (!Len);
  224. if (First[-1] != '\\')
  225. return;
  226. First += Len;
  227. // Keep skipping lines...
  228. }
  229. }
  230. static const char *findLastNonSpace(const char *First, const char *Last) {
  231. assert(First <= Last);
  232. while (First != Last && isHorizontalWhitespace(Last[-1]))
  233. --Last;
  234. return Last;
  235. }
  236. static const char *findFirstTrailingSpace(const char *First,
  237. const char *Last) {
  238. const char *LastNonSpace = findLastNonSpace(First, Last);
  239. if (Last == LastNonSpace)
  240. return Last;
  241. assert(isHorizontalWhitespace(LastNonSpace[0]));
  242. return LastNonSpace + 1;
  243. }
  244. static void skipLineComment(const char *&First, const char *const End) {
  245. assert(First[0] == '/' && First[1] == '/');
  246. First += 2;
  247. skipToNewlineRaw(First, End);
  248. }
  249. static void skipBlockComment(const char *&First, const char *const End) {
  250. assert(First[0] == '/' && First[1] == '*');
  251. if (End - First < 4) {
  252. First = End;
  253. return;
  254. }
  255. for (First += 3; First != End; ++First)
  256. if (First[-1] == '*' && First[0] == '/') {
  257. ++First;
  258. return;
  259. }
  260. }
  261. /// \returns True if the current single quotation mark character is a C++ 14
  262. /// digit separator.
  263. static bool isQuoteCppDigitSeparator(const char *const Start,
  264. const char *const Cur,
  265. const char *const End) {
  266. assert(*Cur == '\'' && "expected quotation character");
  267. // skipLine called in places where we don't expect a valid number
  268. // body before `start` on the same line, so always return false at the start.
  269. if (Start == Cur)
  270. return false;
  271. // The previous character must be a valid PP number character.
  272. // Make sure that the L, u, U, u8 prefixes don't get marked as a
  273. // separator though.
  274. char Prev = *(Cur - 1);
  275. if (Prev == 'L' || Prev == 'U' || Prev == 'u')
  276. return false;
  277. if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
  278. return false;
  279. if (!isPreprocessingNumberBody(Prev))
  280. return false;
  281. // The next character should be a valid identifier body character.
  282. return (Cur + 1) < End && isIdentifierBody(*(Cur + 1));
  283. }
  284. static void skipLine(const char *&First, const char *const End) {
  285. for (;;) {
  286. assert(First <= End);
  287. if (First == End)
  288. return;
  289. if (isVerticalWhitespace(*First)) {
  290. skipNewline(First, End);
  291. return;
  292. }
  293. const char *Start = First;
  294. while (First != End && !isVerticalWhitespace(*First)) {
  295. // Iterate over strings correctly to avoid comments and newlines.
  296. if (*First == '"' ||
  297. (*First == '\'' && !isQuoteCppDigitSeparator(Start, First, End))) {
  298. if (isRawStringLiteral(Start, First))
  299. skipRawString(First, End);
  300. else
  301. skipString(First, End);
  302. continue;
  303. }
  304. // Iterate over comments correctly.
  305. if (*First != '/' || End - First < 2) {
  306. ++First;
  307. continue;
  308. }
  309. if (First[1] == '/') {
  310. // "//...".
  311. skipLineComment(First, End);
  312. continue;
  313. }
  314. if (First[1] != '*') {
  315. ++First;
  316. continue;
  317. }
  318. // "/*...*/".
  319. skipBlockComment(First, End);
  320. }
  321. if (First == End)
  322. return;
  323. // Skip over the newline.
  324. unsigned Len = skipNewline(First, End);
  325. if (!wasLineContinuation(First, Len)) // Continue past line-continuations.
  326. break;
  327. }
  328. }
  329. static void skipDirective(StringRef Name, const char *&First,
  330. const char *const End) {
  331. if (llvm::StringSwitch<bool>(Name)
  332. .Case("warning", true)
  333. .Case("error", true)
  334. .Default(false))
  335. // Do not process quotes or comments.
  336. skipToNewlineRaw(First, End);
  337. else
  338. skipLine(First, End);
  339. }
  340. void Minimizer::printToNewline(const char *&First, const char *const End) {
  341. while (First != End && !isVerticalWhitespace(*First)) {
  342. const char *Last = First;
  343. do {
  344. // Iterate over strings correctly to avoid comments and newlines.
  345. if (*Last == '"' || *Last == '\'' ||
  346. (*Last == '<' && top() == pp_include)) {
  347. if (LLVM_UNLIKELY(isRawStringLiteral(First, Last)))
  348. skipRawString(Last, End);
  349. else
  350. skipString(Last, End);
  351. continue;
  352. }
  353. if (*Last != '/' || End - Last < 2) {
  354. ++Last;
  355. continue; // Gather the rest up to print verbatim.
  356. }
  357. if (Last[1] != '/' && Last[1] != '*') {
  358. ++Last;
  359. continue;
  360. }
  361. // Deal with "//..." and "/*...*/".
  362. append(First, findFirstTrailingSpace(First, Last));
  363. First = Last;
  364. if (Last[1] == '/') {
  365. skipLineComment(First, End);
  366. return;
  367. }
  368. put(' ');
  369. skipBlockComment(First, End);
  370. skipOverSpaces(First, End);
  371. Last = First;
  372. } while (Last != End && !isVerticalWhitespace(*Last));
  373. // Print out the string.
  374. const char *LastBeforeTrailingSpace = findLastNonSpace(First, Last);
  375. if (Last == End || LastBeforeTrailingSpace == First ||
  376. LastBeforeTrailingSpace[-1] != '\\') {
  377. append(First, LastBeforeTrailingSpace);
  378. First = Last;
  379. skipNewline(First, End);
  380. return;
  381. }
  382. // Print up to the backslash, backing up over spaces. Preserve at least one
  383. // space, as the space matters when tokens are separated by a line
  384. // continuation.
  385. append(First, findFirstTrailingSpace(
  386. First, LastBeforeTrailingSpace - 1));
  387. First = Last;
  388. skipNewline(First, End);
  389. skipOverSpaces(First, End);
  390. }
  391. }
  392. static void skipWhitespace(const char *&First, const char *const End) {
  393. for (;;) {
  394. assert(First <= End);
  395. skipOverSpaces(First, End);
  396. if (End - First < 2)
  397. return;
  398. if (First[0] == '\\' && isVerticalWhitespace(First[1])) {
  399. skipNewline(++First, End);
  400. continue;
  401. }
  402. // Check for a non-comment character.
  403. if (First[0] != '/')
  404. return;
  405. // "// ...".
  406. if (First[1] == '/') {
  407. skipLineComment(First, End);
  408. return;
  409. }
  410. // Cannot be a comment.
  411. if (First[1] != '*')
  412. return;
  413. // "/*...*/".
  414. skipBlockComment(First, End);
  415. }
  416. }
  417. void Minimizer::printAdjacentModuleNameParts(const char *&First,
  418. const char *const End) {
  419. // Skip over parts of the body.
  420. const char *Last = First;
  421. do
  422. ++Last;
  423. while (Last != End && (isIdentifierBody(*Last) || *Last == '.'));
  424. append(First, Last);
  425. First = Last;
  426. }
  427. bool Minimizer::printAtImportBody(const char *&First, const char *const End) {
  428. for (;;) {
  429. skipWhitespace(First, End);
  430. if (First == End)
  431. return true;
  432. if (isVerticalWhitespace(*First)) {
  433. skipNewline(First, End);
  434. continue;
  435. }
  436. // Found a semicolon.
  437. if (*First == ';') {
  438. put(*First++).put('\n');
  439. return false;
  440. }
  441. // Don't handle macro expansions inside @import for now.
  442. if (!isIdentifierBody(*First) && *First != '.')
  443. return true;
  444. printAdjacentModuleNameParts(First, End);
  445. }
  446. }
  447. void Minimizer::printDirectiveBody(const char *&First, const char *const End) {
  448. skipWhitespace(First, End); // Skip initial whitespace.
  449. printToNewline(First, End);
  450. while (Out.back() == ' ')
  451. Out.pop_back();
  452. put('\n');
  453. }
  454. LLVM_NODISCARD static const char *lexRawIdentifier(const char *First,
  455. const char *const End) {
  456. assert(isIdentifierBody(*First) && "invalid identifer");
  457. const char *Last = First + 1;
  458. while (Last != End && isIdentifierBody(*Last))
  459. ++Last;
  460. return Last;
  461. }
  462. LLVM_NODISCARD static const char *
  463. getIdentifierContinuation(const char *First, const char *const End) {
  464. if (End - First < 3 || First[0] != '\\' || !isVerticalWhitespace(First[1]))
  465. return nullptr;
  466. ++First;
  467. skipNewline(First, End);
  468. if (First == End)
  469. return nullptr;
  470. return isIdentifierBody(First[0]) ? First : nullptr;
  471. }
  472. Minimizer::IdInfo Minimizer::lexIdentifier(const char *First,
  473. const char *const End) {
  474. const char *Last = lexRawIdentifier(First, End);
  475. const char *Next = getIdentifierContinuation(Last, End);
  476. if (LLVM_LIKELY(!Next))
  477. return IdInfo{Last, StringRef(First, Last - First)};
  478. // Slow path, where identifiers are split over lines.
  479. SmallVector<char, 64> Id(First, Last);
  480. while (Next) {
  481. Last = lexRawIdentifier(Next, End);
  482. Id.append(Next, Last);
  483. Next = getIdentifierContinuation(Last, End);
  484. }
  485. return IdInfo{
  486. Last,
  487. SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
  488. }
  489. void Minimizer::printAdjacentMacroArgs(const char *&First,
  490. const char *const End) {
  491. // Skip over parts of the body.
  492. const char *Last = First;
  493. do
  494. ++Last;
  495. while (Last != End &&
  496. (isIdentifierBody(*Last) || *Last == '.' || *Last == ','));
  497. append(First, Last);
  498. First = Last;
  499. }
  500. bool Minimizer::printMacroArgs(const char *&First, const char *const End) {
  501. assert(*First == '(');
  502. put(*First++);
  503. for (;;) {
  504. skipWhitespace(First, End);
  505. if (First == End)
  506. return true;
  507. if (*First == ')') {
  508. put(*First++);
  509. return false;
  510. }
  511. // This is intentionally fairly liberal.
  512. if (!(isIdentifierBody(*First) || *First == '.' || *First == ','))
  513. return true;
  514. printAdjacentMacroArgs(First, End);
  515. }
  516. }
  517. /// Looks for an identifier starting from Last.
  518. ///
  519. /// Updates "First" to just past the next identifier, if any. Returns true iff
  520. /// the identifier matches "Id".
  521. bool Minimizer::isNextIdentifier(StringRef Id, const char *&First,
  522. const char *const End) {
  523. skipWhitespace(First, End);
  524. if (First == End || !isIdentifierHead(*First))
  525. return false;
  526. IdInfo FoundId = lexIdentifier(First, End);
  527. First = FoundId.Last;
  528. return FoundId.Name == Id;
  529. }
  530. bool Minimizer::lexAt(const char *&First, const char *const End) {
  531. // Handle "@import".
  532. const char *ImportLoc = First++;
  533. if (!isNextIdentifier("import", First, End)) {
  534. skipLine(First, End);
  535. return false;
  536. }
  537. makeToken(decl_at_import);
  538. append("@import ");
  539. if (printAtImportBody(First, End))
  540. return reportError(
  541. ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import);
  542. skipWhitespace(First, End);
  543. if (First == End)
  544. return false;
  545. if (!isVerticalWhitespace(*First))
  546. return reportError(
  547. ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import);
  548. skipNewline(First, End);
  549. return false;
  550. }
  551. bool Minimizer::lexModule(const char *&First, const char *const End) {
  552. IdInfo Id = lexIdentifier(First, End);
  553. First = Id.Last;
  554. bool Export = false;
  555. if (Id.Name == "export") {
  556. Export = true;
  557. skipWhitespace(First, End);
  558. if (!isIdentifierBody(*First)) {
  559. skipLine(First, End);
  560. return false;
  561. }
  562. Id = lexIdentifier(First, End);
  563. First = Id.Last;
  564. }
  565. if (Id.Name != "module" && Id.Name != "import") {
  566. skipLine(First, End);
  567. return false;
  568. }
  569. skipWhitespace(First, End);
  570. // Ignore this as a module directive if the next character can't be part of
  571. // an import.
  572. switch (*First) {
  573. case ':':
  574. case '<':
  575. case '"':
  576. break;
  577. default:
  578. if (!isIdentifierBody(*First)) {
  579. skipLine(First, End);
  580. return false;
  581. }
  582. }
  583. if (Export) {
  584. makeToken(cxx_export_decl);
  585. append("export ");
  586. }
  587. if (Id.Name == "module")
  588. makeToken(cxx_module_decl);
  589. else
  590. makeToken(cxx_import_decl);
  591. append(Id.Name);
  592. append(" ");
  593. printToNewline(First, End);
  594. append("\n");
  595. return false;
  596. }
  597. bool Minimizer::lexDefine(const char *&First, const char *const End) {
  598. makeToken(pp_define);
  599. append("#define ");
  600. skipWhitespace(First, End);
  601. if (!isIdentifierHead(*First))
  602. return reportError(First, diag::err_pp_macro_not_identifier);
  603. IdInfo Id = lexIdentifier(First, End);
  604. const char *Last = Id.Last;
  605. append(Id.Name);
  606. if (Last == End)
  607. return false;
  608. if (*Last == '(') {
  609. size_t Size = Out.size();
  610. if (printMacroArgs(Last, End)) {
  611. // Be robust to bad macro arguments, since they can show up in disabled
  612. // code.
  613. Out.resize(Size);
  614. append("(/* invalid */\n");
  615. skipLine(Last, End);
  616. return false;
  617. }
  618. }
  619. skipWhitespace(Last, End);
  620. if (Last == End)
  621. return false;
  622. if (!isVerticalWhitespace(*Last))
  623. put(' ');
  624. printDirectiveBody(Last, End);
  625. First = Last;
  626. return false;
  627. }
  628. bool Minimizer::lexPragma(const char *&First, const char *const End) {
  629. // #pragma.
  630. skipWhitespace(First, End);
  631. if (First == End || !isIdentifierHead(*First))
  632. return false;
  633. IdInfo FoundId = lexIdentifier(First, End);
  634. First = FoundId.Last;
  635. if (FoundId.Name == "once") {
  636. // #pragma once
  637. skipLine(First, End);
  638. makeToken(pp_pragma_once);
  639. append("#pragma once\n");
  640. return false;
  641. }
  642. if (FoundId.Name != "clang") {
  643. skipLine(First, End);
  644. return false;
  645. }
  646. // #pragma clang.
  647. if (!isNextIdentifier("module", First, End)) {
  648. skipLine(First, End);
  649. return false;
  650. }
  651. // #pragma clang module.
  652. if (!isNextIdentifier("import", First, End)) {
  653. skipLine(First, End);
  654. return false;
  655. }
  656. // #pragma clang module import.
  657. makeToken(pp_pragma_import);
  658. append("#pragma clang module import ");
  659. printDirectiveBody(First, End);
  660. return false;
  661. }
  662. bool Minimizer::lexEndif(const char *&First, const char *const End) {
  663. // Strip out "#else" if it's empty.
  664. if (top() == pp_else)
  665. popToken();
  666. // Strip out "#elif" if they're empty.
  667. while (top() == pp_elif)
  668. popToken();
  669. // If "#if" is empty, strip it and skip the "#endif".
  670. if (top() == pp_if || top() == pp_ifdef || top() == pp_ifndef) {
  671. popToken();
  672. skipLine(First, End);
  673. return false;
  674. }
  675. return lexDefault(pp_endif, "endif", First, End);
  676. }
  677. bool Minimizer::lexDefault(TokenKind Kind, StringRef Directive,
  678. const char *&First, const char *const End) {
  679. makeToken(Kind);
  680. put('#').append(Directive).put(' ');
  681. printDirectiveBody(First, End);
  682. return false;
  683. }
  684. static bool isStartOfRelevantLine(char First) {
  685. switch (First) {
  686. case '#':
  687. case '@':
  688. case 'i':
  689. case 'e':
  690. case 'm':
  691. return true;
  692. }
  693. return false;
  694. }
  695. bool Minimizer::lexPPLine(const char *&First, const char *const End) {
  696. assert(First != End);
  697. skipWhitespace(First, End);
  698. assert(First <= End);
  699. if (First == End)
  700. return false;
  701. if (!isStartOfRelevantLine(*First)) {
  702. skipLine(First, End);
  703. assert(First <= End);
  704. return false;
  705. }
  706. // Handle "@import".
  707. if (*First == '@')
  708. return lexAt(First, End);
  709. if (*First == 'i' || *First == 'e' || *First == 'm')
  710. return lexModule(First, End);
  711. // Handle preprocessing directives.
  712. ++First; // Skip over '#'.
  713. skipWhitespace(First, End);
  714. if (First == End)
  715. return reportError(First, diag::err_pp_expected_eol);
  716. if (!isIdentifierHead(*First)) {
  717. skipLine(First, End);
  718. return false;
  719. }
  720. // Figure out the token.
  721. IdInfo Id = lexIdentifier(First, End);
  722. First = Id.Last;
  723. auto Kind = llvm::StringSwitch<TokenKind>(Id.Name)
  724. .Case("include", pp_include)
  725. .Case("__include_macros", pp___include_macros)
  726. .Case("define", pp_define)
  727. .Case("undef", pp_undef)
  728. .Case("import", pp_import)
  729. .Case("include_next", pp_include_next)
  730. .Case("if", pp_if)
  731. .Case("ifdef", pp_ifdef)
  732. .Case("ifndef", pp_ifndef)
  733. .Case("elif", pp_elif)
  734. .Case("else", pp_else)
  735. .Case("endif", pp_endif)
  736. .Case("pragma", pp_pragma_import)
  737. .Default(pp_none);
  738. if (Kind == pp_none) {
  739. skipDirective(Id.Name, First, End);
  740. return false;
  741. }
  742. if (Kind == pp_endif)
  743. return lexEndif(First, End);
  744. if (Kind == pp_define)
  745. return lexDefine(First, End);
  746. if (Kind == pp_pragma_import)
  747. return lexPragma(First, End);
  748. // Everything else.
  749. return lexDefault(Kind, Id.Name, First, End);
  750. }
  751. static void skipUTF8ByteOrderMark(const char *&First, const char *const End) {
  752. if ((End - First) >= 3 && First[0] == '\xef' && First[1] == '\xbb' &&
  753. First[2] == '\xbf')
  754. First += 3;
  755. }
  756. bool Minimizer::minimizeImpl(const char *First, const char *const End) {
  757. skipUTF8ByteOrderMark(First, End);
  758. while (First != End)
  759. if (lexPPLine(First, End))
  760. return true;
  761. return false;
  762. }
  763. bool Minimizer::minimize() {
  764. bool Error = minimizeImpl(Input.begin(), Input.end());
  765. if (!Error) {
  766. // Add a trailing newline and an EOF on success.
  767. if (!Out.empty() && Out.back() != '\n')
  768. Out.push_back('\n');
  769. makeToken(pp_eof);
  770. }
  771. // Null-terminate the output. This way the memory buffer that's passed to
  772. // Clang will not have to worry about the terminating '\0'.
  773. Out.push_back(0);
  774. Out.pop_back();
  775. return Error;
  776. }
  777. bool clang::minimize_source_to_dependency_directives::computeSkippedRanges(
  778. ArrayRef<Token> Input, llvm::SmallVectorImpl<SkippedRange> &Range) {
  779. struct Directive {
  780. enum DirectiveKind {
  781. If, // if/ifdef/ifndef
  782. Else // elif,else
  783. };
  784. int Offset;
  785. DirectiveKind Kind;
  786. };
  787. llvm::SmallVector<Directive, 32> Offsets;
  788. for (const Token &T : Input) {
  789. switch (T.K) {
  790. case pp_if:
  791. case pp_ifdef:
  792. case pp_ifndef:
  793. Offsets.push_back({T.Offset, Directive::If});
  794. break;
  795. case pp_elif:
  796. case pp_else: {
  797. if (Offsets.empty())
  798. return true;
  799. int PreviousOffset = Offsets.back().Offset;
  800. Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
  801. Offsets.push_back({T.Offset, Directive::Else});
  802. break;
  803. }
  804. case pp_endif: {
  805. if (Offsets.empty())
  806. return true;
  807. int PreviousOffset = Offsets.back().Offset;
  808. Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
  809. do {
  810. Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind;
  811. if (Kind == Directive::If)
  812. break;
  813. } while (!Offsets.empty());
  814. break;
  815. }
  816. default:
  817. break;
  818. }
  819. }
  820. return false;
  821. }
  822. bool clang::minimizeSourceToDependencyDirectives(
  823. StringRef Input, SmallVectorImpl<char> &Output,
  824. SmallVectorImpl<Token> &Tokens, DiagnosticsEngine *Diags,
  825. SourceLocation InputSourceLoc) {
  826. Output.clear();
  827. Tokens.clear();
  828. return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize();
  829. }