UnwrappedLineFormatter.cpp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950
  1. //===--- UnwrappedLineFormatter.cpp - Format C++ code ---------------------===//
  2. //
  3. // The LLVM Compiler Infrastructure
  4. //
  5. // This file is distributed under the University of Illinois Open Source
  6. // License. See LICENSE.TXT for details.
  7. //
  8. //===----------------------------------------------------------------------===//
  9. #include "UnwrappedLineFormatter.h"
  10. #include "WhitespaceManager.h"
  11. #include "llvm/Support/Debug.h"
  12. #define DEBUG_TYPE "format-formatter"
  13. namespace clang {
  14. namespace format {
  15. namespace {
  16. bool startsExternCBlock(const AnnotatedLine &Line) {
  17. const FormatToken *Next = Line.First->getNextNonComment();
  18. const FormatToken *NextNext = Next ? Next->getNextNonComment() : nullptr;
  19. return Line.startsWith(tok::kw_extern) && Next && Next->isStringLiteral() &&
  20. NextNext && NextNext->is(tok::l_brace);
  21. }
  22. /// \brief Tracks the indent level of \c AnnotatedLines across levels.
  23. ///
  24. /// \c nextLine must be called for each \c AnnotatedLine, after which \c
  25. /// getIndent() will return the indent for the last line \c nextLine was called
  26. /// with.
  27. /// If the line is not formatted (and thus the indent does not change), calling
  28. /// \c adjustToUnmodifiedLine after the call to \c nextLine will cause
  29. /// subsequent lines on the same level to be indented at the same level as the
  30. /// given line.
  31. class LevelIndentTracker {
  32. public:
  33. LevelIndentTracker(const FormatStyle &Style,
  34. const AdditionalKeywords &Keywords, unsigned StartLevel,
  35. int AdditionalIndent)
  36. : Style(Style), Keywords(Keywords), AdditionalIndent(AdditionalIndent) {
  37. for (unsigned i = 0; i != StartLevel; ++i)
  38. IndentForLevel.push_back(Style.IndentWidth * i + AdditionalIndent);
  39. }
  40. /// \brief Returns the indent for the current line.
  41. unsigned getIndent() const { return Indent; }
  42. /// \brief Update the indent state given that \p Line is going to be formatted
  43. /// next.
  44. void nextLine(const AnnotatedLine &Line) {
  45. Offset = getIndentOffset(*Line.First);
  46. // Update the indent level cache size so that we can rely on it
  47. // having the right size in adjustToUnmodifiedline.
  48. while (IndentForLevel.size() <= Line.Level)
  49. IndentForLevel.push_back(-1);
  50. if (Line.InPPDirective) {
  51. Indent = Line.Level * Style.IndentWidth + AdditionalIndent;
  52. } else {
  53. IndentForLevel.resize(Line.Level + 1);
  54. Indent = getIndent(IndentForLevel, Line.Level);
  55. }
  56. if (static_cast<int>(Indent) + Offset >= 0)
  57. Indent += Offset;
  58. }
  59. /// \brief Update the level indent to adapt to the given \p Line.
  60. ///
  61. /// When a line is not formatted, we move the subsequent lines on the same
  62. /// level to the same indent.
  63. /// Note that \c nextLine must have been called before this method.
  64. void adjustToUnmodifiedLine(const AnnotatedLine &Line) {
  65. unsigned LevelIndent = Line.First->OriginalColumn;
  66. if (static_cast<int>(LevelIndent) - Offset >= 0)
  67. LevelIndent -= Offset;
  68. if ((!Line.First->is(tok::comment) || IndentForLevel[Line.Level] == -1) &&
  69. !Line.InPPDirective)
  70. IndentForLevel[Line.Level] = LevelIndent;
  71. }
  72. private:
  73. /// \brief Get the offset of the line relatively to the level.
  74. ///
  75. /// For example, 'public:' labels in classes are offset by 1 or 2
  76. /// characters to the left from their level.
  77. int getIndentOffset(const FormatToken &RootToken) {
  78. if (Style.Language == FormatStyle::LK_Java ||
  79. Style.Language == FormatStyle::LK_JavaScript)
  80. return 0;
  81. if (RootToken.isAccessSpecifier(false) ||
  82. RootToken.isObjCAccessSpecifier() ||
  83. (RootToken.is(Keywords.kw_signals) && RootToken.Next &&
  84. RootToken.Next->is(tok::colon)))
  85. return Style.AccessModifierOffset;
  86. return 0;
  87. }
  88. /// \brief Get the indent of \p Level from \p IndentForLevel.
  89. ///
  90. /// \p IndentForLevel must contain the indent for the level \c l
  91. /// at \p IndentForLevel[l], or a value < 0 if the indent for
  92. /// that level is unknown.
  93. unsigned getIndent(ArrayRef<int> IndentForLevel, unsigned Level) {
  94. if (IndentForLevel[Level] != -1)
  95. return IndentForLevel[Level];
  96. if (Level == 0)
  97. return 0;
  98. return getIndent(IndentForLevel, Level - 1) + Style.IndentWidth;
  99. }
  100. const FormatStyle &Style;
  101. const AdditionalKeywords &Keywords;
  102. const unsigned AdditionalIndent;
  103. /// \brief The indent in characters for each level.
  104. std::vector<int> IndentForLevel;
  105. /// \brief Offset of the current line relative to the indent level.
  106. ///
  107. /// For example, the 'public' keywords is often indented with a negative
  108. /// offset.
  109. int Offset = 0;
  110. /// \brief The current line's indent.
  111. unsigned Indent = 0;
  112. };
  113. class LineJoiner {
  114. public:
  115. LineJoiner(const FormatStyle &Style, const AdditionalKeywords &Keywords,
  116. const SmallVectorImpl<AnnotatedLine *> &Lines)
  117. : Style(Style), Keywords(Keywords), End(Lines.end()),
  118. Next(Lines.begin()) {}
  119. /// \brief Returns the next line, merging multiple lines into one if possible.
  120. const AnnotatedLine *getNextMergedLine(bool DryRun,
  121. LevelIndentTracker &IndentTracker) {
  122. if (Next == End)
  123. return nullptr;
  124. const AnnotatedLine *Current = *Next;
  125. IndentTracker.nextLine(*Current);
  126. unsigned MergedLines =
  127. tryFitMultipleLinesInOne(IndentTracker.getIndent(), Next, End);
  128. if (MergedLines > 0 && Style.ColumnLimit == 0)
  129. // Disallow line merging if there is a break at the start of one of the
  130. // input lines.
  131. for (unsigned i = 0; i < MergedLines; ++i)
  132. if (Next[i + 1]->First->NewlinesBefore > 0)
  133. MergedLines = 0;
  134. if (!DryRun)
  135. for (unsigned i = 0; i < MergedLines; ++i)
  136. join(*Next[i], *Next[i + 1]);
  137. Next = Next + MergedLines + 1;
  138. return Current;
  139. }
  140. private:
  141. /// \brief Calculates how many lines can be merged into 1 starting at \p I.
  142. unsigned
  143. tryFitMultipleLinesInOne(unsigned Indent,
  144. SmallVectorImpl<AnnotatedLine *>::const_iterator I,
  145. SmallVectorImpl<AnnotatedLine *>::const_iterator E) {
  146. // Can't join the last line with anything.
  147. if (I + 1 == E)
  148. return 0;
  149. // We can never merge stuff if there are trailing line comments.
  150. const AnnotatedLine *TheLine = *I;
  151. if (TheLine->Last->is(TT_LineComment))
  152. return 0;
  153. if (I[1]->Type == LT_Invalid || I[1]->First->MustBreakBefore)
  154. return 0;
  155. if (TheLine->InPPDirective &&
  156. (!I[1]->InPPDirective || I[1]->First->HasUnescapedNewline))
  157. return 0;
  158. if (Style.ColumnLimit > 0 && Indent > Style.ColumnLimit)
  159. return 0;
  160. unsigned Limit =
  161. Style.ColumnLimit == 0 ? UINT_MAX : Style.ColumnLimit - Indent;
  162. // If we already exceed the column limit, we set 'Limit' to 0. The different
  163. // tryMerge..() functions can then decide whether to still do merging.
  164. Limit = TheLine->Last->TotalLength > Limit
  165. ? 0
  166. : Limit - TheLine->Last->TotalLength;
  167. // FIXME: TheLine->Level != 0 might or might not be the right check to do.
  168. // If necessary, change to something smarter.
  169. bool MergeShortFunctions =
  170. Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_All ||
  171. (Style.AllowShortFunctionsOnASingleLine >= FormatStyle::SFS_Empty &&
  172. I[1]->First->is(tok::r_brace)) ||
  173. (Style.AllowShortFunctionsOnASingleLine == FormatStyle::SFS_Inline &&
  174. TheLine->Level != 0);
  175. if (TheLine->Last->is(TT_FunctionLBrace) &&
  176. TheLine->First != TheLine->Last) {
  177. return MergeShortFunctions ? tryMergeSimpleBlock(I, E, Limit) : 0;
  178. }
  179. if (TheLine->Last->is(tok::l_brace)) {
  180. return !Style.BraceWrapping.AfterFunction
  181. ? tryMergeSimpleBlock(I, E, Limit)
  182. : 0;
  183. }
  184. if (I[1]->First->is(TT_FunctionLBrace) &&
  185. Style.BraceWrapping.AfterFunction) {
  186. if (I[1]->Last->is(TT_LineComment))
  187. return 0;
  188. // Check for Limit <= 2 to account for the " {".
  189. if (Limit <= 2 || (Style.ColumnLimit == 0 && containsMustBreak(TheLine)))
  190. return 0;
  191. Limit -= 2;
  192. unsigned MergedLines = 0;
  193. if (MergeShortFunctions) {
  194. MergedLines = tryMergeSimpleBlock(I + 1, E, Limit);
  195. // If we managed to merge the block, count the function header, which is
  196. // on a separate line.
  197. if (MergedLines > 0)
  198. ++MergedLines;
  199. }
  200. return MergedLines;
  201. }
  202. if (TheLine->First->is(tok::kw_if)) {
  203. return Style.AllowShortIfStatementsOnASingleLine
  204. ? tryMergeSimpleControlStatement(I, E, Limit)
  205. : 0;
  206. }
  207. if (TheLine->First->isOneOf(tok::kw_for, tok::kw_while)) {
  208. return Style.AllowShortLoopsOnASingleLine
  209. ? tryMergeSimpleControlStatement(I, E, Limit)
  210. : 0;
  211. }
  212. if (TheLine->First->isOneOf(tok::kw_case, tok::kw_default)) {
  213. return Style.AllowShortCaseLabelsOnASingleLine
  214. ? tryMergeShortCaseLabels(I, E, Limit)
  215. : 0;
  216. }
  217. if (TheLine->InPPDirective &&
  218. (TheLine->First->HasUnescapedNewline || TheLine->First->IsFirst)) {
  219. return tryMergeSimplePPDirective(I, E, Limit);
  220. }
  221. return 0;
  222. }
  223. unsigned
  224. tryMergeSimplePPDirective(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
  225. SmallVectorImpl<AnnotatedLine *>::const_iterator E,
  226. unsigned Limit) {
  227. if (Limit == 0)
  228. return 0;
  229. if (I + 2 != E && I[2]->InPPDirective && !I[2]->First->HasUnescapedNewline)
  230. return 0;
  231. if (1 + I[1]->Last->TotalLength > Limit)
  232. return 0;
  233. return 1;
  234. }
  235. unsigned tryMergeSimpleControlStatement(
  236. SmallVectorImpl<AnnotatedLine *>::const_iterator I,
  237. SmallVectorImpl<AnnotatedLine *>::const_iterator E, unsigned Limit) {
  238. if (Limit == 0)
  239. return 0;
  240. if (Style.BraceWrapping.AfterControlStatement &&
  241. (I[1]->First->is(tok::l_brace) && !Style.AllowShortBlocksOnASingleLine))
  242. return 0;
  243. if (I[1]->InPPDirective != (*I)->InPPDirective ||
  244. (I[1]->InPPDirective && I[1]->First->HasUnescapedNewline))
  245. return 0;
  246. Limit = limitConsideringMacros(I + 1, E, Limit);
  247. AnnotatedLine &Line = **I;
  248. if (Line.Last->isNot(tok::r_paren))
  249. return 0;
  250. if (1 + I[1]->Last->TotalLength > Limit)
  251. return 0;
  252. if (I[1]->First->isOneOf(tok::semi, tok::kw_if, tok::kw_for, tok::kw_while,
  253. TT_LineComment))
  254. return 0;
  255. // Only inline simple if's (no nested if or else).
  256. if (I + 2 != E && Line.startsWith(tok::kw_if) &&
  257. I[2]->First->is(tok::kw_else))
  258. return 0;
  259. return 1;
  260. }
  261. unsigned
  262. tryMergeShortCaseLabels(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
  263. SmallVectorImpl<AnnotatedLine *>::const_iterator E,
  264. unsigned Limit) {
  265. if (Limit == 0 || I + 1 == E ||
  266. I[1]->First->isOneOf(tok::kw_case, tok::kw_default))
  267. return 0;
  268. unsigned NumStmts = 0;
  269. unsigned Length = 0;
  270. bool InPPDirective = I[0]->InPPDirective;
  271. for (; NumStmts < 3; ++NumStmts) {
  272. if (I + 1 + NumStmts == E)
  273. break;
  274. const AnnotatedLine *Line = I[1 + NumStmts];
  275. if (Line->InPPDirective != InPPDirective)
  276. break;
  277. if (Line->First->isOneOf(tok::kw_case, tok::kw_default, tok::r_brace))
  278. break;
  279. if (Line->First->isOneOf(tok::kw_if, tok::kw_for, tok::kw_switch,
  280. tok::kw_while, tok::comment) ||
  281. Line->Last->is(tok::comment))
  282. return 0;
  283. Length += I[1 + NumStmts]->Last->TotalLength + 1; // 1 for the space.
  284. }
  285. if (NumStmts == 0 || NumStmts == 3 || Length > Limit)
  286. return 0;
  287. return NumStmts;
  288. }
  289. unsigned
  290. tryMergeSimpleBlock(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
  291. SmallVectorImpl<AnnotatedLine *>::const_iterator E,
  292. unsigned Limit) {
  293. AnnotatedLine &Line = **I;
  294. // Don't merge ObjC @ keywords and methods.
  295. // FIXME: If an option to allow short exception handling clauses on a single
  296. // line is added, change this to not return for @try and friends.
  297. if (Style.Language != FormatStyle::LK_Java &&
  298. Line.First->isOneOf(tok::at, tok::minus, tok::plus))
  299. return 0;
  300. // Check that the current line allows merging. This depends on whether we
  301. // are in a control flow statements as well as several style flags.
  302. if (Line.First->isOneOf(tok::kw_else, tok::kw_case) ||
  303. (Line.First->Next && Line.First->Next->is(tok::kw_else)))
  304. return 0;
  305. if (Line.First->isOneOf(tok::kw_if, tok::kw_while, tok::kw_do, tok::kw_try,
  306. tok::kw___try, tok::kw_catch, tok::kw___finally,
  307. tok::kw_for, tok::r_brace, Keywords.kw___except)) {
  308. if (!Style.AllowShortBlocksOnASingleLine)
  309. return 0;
  310. if (!Style.AllowShortIfStatementsOnASingleLine &&
  311. Line.startsWith(tok::kw_if))
  312. return 0;
  313. if (!Style.AllowShortLoopsOnASingleLine &&
  314. Line.First->isOneOf(tok::kw_while, tok::kw_do, tok::kw_for))
  315. return 0;
  316. // FIXME: Consider an option to allow short exception handling clauses on
  317. // a single line.
  318. // FIXME: This isn't covered by tests.
  319. // FIXME: For catch, __except, __finally the first token on the line
  320. // is '}', so this isn't correct here.
  321. if (Line.First->isOneOf(tok::kw_try, tok::kw___try, tok::kw_catch,
  322. Keywords.kw___except, tok::kw___finally))
  323. return 0;
  324. }
  325. FormatToken *Tok = I[1]->First;
  326. if (Tok->is(tok::r_brace) && !Tok->MustBreakBefore &&
  327. (Tok->getNextNonComment() == nullptr ||
  328. Tok->getNextNonComment()->is(tok::semi))) {
  329. // We merge empty blocks even if the line exceeds the column limit.
  330. Tok->SpacesRequiredBefore = 0;
  331. Tok->CanBreakBefore = true;
  332. return 1;
  333. } else if (Limit != 0 && !Line.startsWith(tok::kw_namespace) &&
  334. !startsExternCBlock(Line)) {
  335. // We don't merge short records.
  336. if (Line.First->isOneOf(tok::kw_class, tok::kw_union, tok::kw_struct,
  337. Keywords.kw_interface))
  338. return 0;
  339. // Check that we still have three lines and they fit into the limit.
  340. if (I + 2 == E || I[2]->Type == LT_Invalid)
  341. return 0;
  342. Limit = limitConsideringMacros(I + 2, E, Limit);
  343. if (!nextTwoLinesFitInto(I, Limit))
  344. return 0;
  345. // Second, check that the next line does not contain any braces - if it
  346. // does, readability declines when putting it into a single line.
  347. if (I[1]->Last->is(TT_LineComment))
  348. return 0;
  349. do {
  350. if (Tok->is(tok::l_brace) && Tok->BlockKind != BK_BracedInit)
  351. return 0;
  352. Tok = Tok->Next;
  353. } while (Tok);
  354. // Last, check that the third line starts with a closing brace.
  355. Tok = I[2]->First;
  356. if (Tok->isNot(tok::r_brace))
  357. return 0;
  358. // Don't merge "if (a) { .. } else {".
  359. if (Tok->Next && Tok->Next->is(tok::kw_else))
  360. return 0;
  361. return 2;
  362. }
  363. return 0;
  364. }
  365. /// Returns the modified column limit for \p I if it is inside a macro and
  366. /// needs a trailing '\'.
  367. unsigned
  368. limitConsideringMacros(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
  369. SmallVectorImpl<AnnotatedLine *>::const_iterator E,
  370. unsigned Limit) {
  371. if (I[0]->InPPDirective && I + 1 != E &&
  372. !I[1]->First->HasUnescapedNewline && !I[1]->First->is(tok::eof)) {
  373. return Limit < 2 ? 0 : Limit - 2;
  374. }
  375. return Limit;
  376. }
  377. bool nextTwoLinesFitInto(SmallVectorImpl<AnnotatedLine *>::const_iterator I,
  378. unsigned Limit) {
  379. if (I[1]->First->MustBreakBefore || I[2]->First->MustBreakBefore)
  380. return false;
  381. return 1 + I[1]->Last->TotalLength + 1 + I[2]->Last->TotalLength <= Limit;
  382. }
  383. bool containsMustBreak(const AnnotatedLine *Line) {
  384. for (const FormatToken *Tok = Line->First; Tok; Tok = Tok->Next) {
  385. if (Tok->MustBreakBefore)
  386. return true;
  387. }
  388. return false;
  389. }
  390. void join(AnnotatedLine &A, const AnnotatedLine &B) {
  391. assert(!A.Last->Next);
  392. assert(!B.First->Previous);
  393. if (B.Affected)
  394. A.Affected = true;
  395. A.Last->Next = B.First;
  396. B.First->Previous = A.Last;
  397. B.First->CanBreakBefore = true;
  398. unsigned LengthA = A.Last->TotalLength + B.First->SpacesRequiredBefore;
  399. for (FormatToken *Tok = B.First; Tok; Tok = Tok->Next) {
  400. Tok->TotalLength += LengthA;
  401. A.Last = Tok;
  402. }
  403. }
  404. const FormatStyle &Style;
  405. const AdditionalKeywords &Keywords;
  406. const SmallVectorImpl<AnnotatedLine *>::const_iterator End;
  407. SmallVectorImpl<AnnotatedLine *>::const_iterator Next;
  408. };
  409. static void markFinalized(FormatToken *Tok) {
  410. for (; Tok; Tok = Tok->Next) {
  411. Tok->Finalized = true;
  412. for (AnnotatedLine *Child : Tok->Children)
  413. markFinalized(Child->First);
  414. }
  415. }
  416. #ifndef NDEBUG
  417. static void printLineState(const LineState &State) {
  418. llvm::dbgs() << "State: ";
  419. for (const ParenState &P : State.Stack) {
  420. llvm::dbgs() << P.Indent << "|" << P.LastSpace << "|" << P.NestedBlockIndent
  421. << " ";
  422. }
  423. llvm::dbgs() << State.NextToken->TokenText << "\n";
  424. }
  425. #endif
  426. /// \brief Base class for classes that format one \c AnnotatedLine.
  427. class LineFormatter {
  428. public:
  429. LineFormatter(ContinuationIndenter *Indenter, WhitespaceManager *Whitespaces,
  430. const FormatStyle &Style,
  431. UnwrappedLineFormatter *BlockFormatter)
  432. : Indenter(Indenter), Whitespaces(Whitespaces), Style(Style),
  433. BlockFormatter(BlockFormatter) {}
  434. virtual ~LineFormatter() {}
  435. /// \brief Formats an \c AnnotatedLine and returns the penalty.
  436. ///
  437. /// If \p DryRun is \c false, directly applies the changes.
  438. virtual unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
  439. bool DryRun) = 0;
  440. protected:
  441. /// \brief If the \p State's next token is an r_brace closing a nested block,
  442. /// format the nested block before it.
  443. ///
  444. /// Returns \c true if all children could be placed successfully and adapts
  445. /// \p Penalty as well as \p State. If \p DryRun is false, also directly
  446. /// creates changes using \c Whitespaces.
  447. ///
  448. /// The crucial idea here is that children always get formatted upon
  449. /// encountering the closing brace right after the nested block. Now, if we
  450. /// are currently trying to keep the "}" on the same line (i.e. \p NewLine is
  451. /// \c false), the entire block has to be kept on the same line (which is only
  452. /// possible if it fits on the line, only contains a single statement, etc.
  453. ///
  454. /// If \p NewLine is true, we format the nested block on separate lines, i.e.
  455. /// break after the "{", format all lines with correct indentation and the put
  456. /// the closing "}" on yet another new line.
  457. ///
  458. /// This enables us to keep the simple structure of the
  459. /// \c UnwrappedLineFormatter, where we only have two options for each token:
  460. /// break or don't break.
  461. bool formatChildren(LineState &State, bool NewLine, bool DryRun,
  462. unsigned &Penalty) {
  463. const FormatToken *LBrace = State.NextToken->getPreviousNonComment();
  464. FormatToken &Previous = *State.NextToken->Previous;
  465. if (!LBrace || LBrace->isNot(tok::l_brace) ||
  466. LBrace->BlockKind != BK_Block || Previous.Children.size() == 0)
  467. // The previous token does not open a block. Nothing to do. We don't
  468. // assert so that we can simply call this function for all tokens.
  469. return true;
  470. if (NewLine) {
  471. int AdditionalIndent = State.Stack.back().Indent -
  472. Previous.Children[0]->Level * Style.IndentWidth;
  473. Penalty +=
  474. BlockFormatter->format(Previous.Children, DryRun, AdditionalIndent,
  475. /*FixBadIndentation=*/true);
  476. return true;
  477. }
  478. if (Previous.Children[0]->First->MustBreakBefore)
  479. return false;
  480. // Cannot merge multiple statements into a single line.
  481. if (Previous.Children.size() > 1)
  482. return false;
  483. // Cannot merge into one line if this line ends on a comment.
  484. if (Previous.is(tok::comment))
  485. return false;
  486. // We can't put the closing "}" on a line with a trailing comment.
  487. if (Previous.Children[0]->Last->isTrailingComment())
  488. return false;
  489. // If the child line exceeds the column limit, we wouldn't want to merge it.
  490. // We add +2 for the trailing " }".
  491. if (Style.ColumnLimit > 0 &&
  492. Previous.Children[0]->Last->TotalLength + State.Column + 2 >
  493. Style.ColumnLimit)
  494. return false;
  495. if (!DryRun) {
  496. Whitespaces->replaceWhitespace(
  497. *Previous.Children[0]->First,
  498. /*Newlines=*/0, /*IndentLevel=*/0, /*Spaces=*/1,
  499. /*StartOfTokenColumn=*/State.Column, State.Line->InPPDirective);
  500. }
  501. Penalty += formatLine(*Previous.Children[0], State.Column + 1, DryRun);
  502. State.Column += 1 + Previous.Children[0]->Last->TotalLength;
  503. return true;
  504. }
  505. ContinuationIndenter *Indenter;
  506. private:
  507. WhitespaceManager *Whitespaces;
  508. const FormatStyle &Style;
  509. UnwrappedLineFormatter *BlockFormatter;
  510. };
  511. /// \brief Formatter that keeps the existing line breaks.
  512. class NoColumnLimitLineFormatter : public LineFormatter {
  513. public:
  514. NoColumnLimitLineFormatter(ContinuationIndenter *Indenter,
  515. WhitespaceManager *Whitespaces,
  516. const FormatStyle &Style,
  517. UnwrappedLineFormatter *BlockFormatter)
  518. : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
  519. /// \brief Formats the line, simply keeping all of the input's line breaking
  520. /// decisions.
  521. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
  522. bool DryRun) override {
  523. assert(!DryRun);
  524. LineState State =
  525. Indenter->getInitialState(FirstIndent, &Line, /*DryRun=*/false);
  526. while (State.NextToken) {
  527. bool Newline =
  528. Indenter->mustBreak(State) ||
  529. (Indenter->canBreak(State) && State.NextToken->NewlinesBefore > 0);
  530. unsigned Penalty = 0;
  531. formatChildren(State, Newline, /*DryRun=*/false, Penalty);
  532. Indenter->addTokenToState(State, Newline, /*DryRun=*/false);
  533. }
  534. return 0;
  535. }
  536. };
  537. /// \brief Formatter that puts all tokens into a single line without breaks.
  538. class NoLineBreakFormatter : public LineFormatter {
  539. public:
  540. NoLineBreakFormatter(ContinuationIndenter *Indenter,
  541. WhitespaceManager *Whitespaces, const FormatStyle &Style,
  542. UnwrappedLineFormatter *BlockFormatter)
  543. : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
  544. /// \brief Puts all tokens into a single line.
  545. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
  546. bool DryRun) override {
  547. unsigned Penalty = 0;
  548. LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
  549. while (State.NextToken) {
  550. formatChildren(State, /*Newline=*/false, DryRun, Penalty);
  551. Indenter->addTokenToState(State, /*Newline=*/false, DryRun);
  552. }
  553. return Penalty;
  554. }
  555. };
  556. /// \brief Finds the best way to break lines.
  557. class OptimizingLineFormatter : public LineFormatter {
  558. public:
  559. OptimizingLineFormatter(ContinuationIndenter *Indenter,
  560. WhitespaceManager *Whitespaces,
  561. const FormatStyle &Style,
  562. UnwrappedLineFormatter *BlockFormatter)
  563. : LineFormatter(Indenter, Whitespaces, Style, BlockFormatter) {}
  564. /// \brief Formats the line by finding the best line breaks with line lengths
  565. /// below the column limit.
  566. unsigned formatLine(const AnnotatedLine &Line, unsigned FirstIndent,
  567. bool DryRun) override {
  568. LineState State = Indenter->getInitialState(FirstIndent, &Line, DryRun);
  569. // If the ObjC method declaration does not fit on a line, we should format
  570. // it with one arg per line.
  571. if (State.Line->Type == LT_ObjCMethodDecl)
  572. State.Stack.back().BreakBeforeParameter = true;
  573. // Find best solution in solution space.
  574. return analyzeSolutionSpace(State, DryRun);
  575. }
  576. private:
  577. struct CompareLineStatePointers {
  578. bool operator()(LineState *obj1, LineState *obj2) const {
  579. return *obj1 < *obj2;
  580. }
  581. };
  582. /// \brief A pair of <penalty, count> that is used to prioritize the BFS on.
  583. ///
  584. /// In case of equal penalties, we want to prefer states that were inserted
  585. /// first. During state generation we make sure that we insert states first
  586. /// that break the line as late as possible.
  587. typedef std::pair<unsigned, unsigned> OrderedPenalty;
  588. /// \brief An edge in the solution space from \c Previous->State to \c State,
  589. /// inserting a newline dependent on the \c NewLine.
  590. struct StateNode {
  591. StateNode(const LineState &State, bool NewLine, StateNode *Previous)
  592. : State(State), NewLine(NewLine), Previous(Previous) {}
  593. LineState State;
  594. bool NewLine;
  595. StateNode *Previous;
  596. };
  597. /// \brief An item in the prioritized BFS search queue. The \c StateNode's
  598. /// \c State has the given \c OrderedPenalty.
  599. typedef std::pair<OrderedPenalty, StateNode *> QueueItem;
  600. /// \brief The BFS queue type.
  601. typedef std::priority_queue<QueueItem, std::vector<QueueItem>,
  602. std::greater<QueueItem>> QueueType;
  603. /// \brief Analyze the entire solution space starting from \p InitialState.
  604. ///
  605. /// This implements a variant of Dijkstra's algorithm on the graph that spans
  606. /// the solution space (\c LineStates are the nodes). The algorithm tries to
  607. /// find the shortest path (the one with lowest penalty) from \p InitialState
  608. /// to a state where all tokens are placed. Returns the penalty.
  609. ///
  610. /// If \p DryRun is \c false, directly applies the changes.
  611. unsigned analyzeSolutionSpace(LineState &InitialState, bool DryRun) {
  612. std::set<LineState *, CompareLineStatePointers> Seen;
  613. // Increasing count of \c StateNode items we have created. This is used to
  614. // create a deterministic order independent of the container.
  615. unsigned Count = 0;
  616. QueueType Queue;
  617. // Insert start element into queue.
  618. StateNode *Node =
  619. new (Allocator.Allocate()) StateNode(InitialState, false, nullptr);
  620. Queue.push(QueueItem(OrderedPenalty(0, Count), Node));
  621. ++Count;
  622. unsigned Penalty = 0;
  623. // While not empty, take first element and follow edges.
  624. while (!Queue.empty()) {
  625. Penalty = Queue.top().first.first;
  626. StateNode *Node = Queue.top().second;
  627. if (!Node->State.NextToken) {
  628. DEBUG(llvm::dbgs() << "\n---\nPenalty for line: " << Penalty << "\n");
  629. break;
  630. }
  631. Queue.pop();
  632. // Cut off the analysis of certain solutions if the analysis gets too
  633. // complex. See description of IgnoreStackForComparison.
  634. if (Count > 10000)
  635. Node->State.IgnoreStackForComparison = true;
  636. if (!Seen.insert(&Node->State).second)
  637. // State already examined with lower penalty.
  638. continue;
  639. FormatDecision LastFormat = Node->State.NextToken->Decision;
  640. if (LastFormat == FD_Unformatted || LastFormat == FD_Continue)
  641. addNextStateToQueue(Penalty, Node, /*NewLine=*/false, &Count, &Queue);
  642. if (LastFormat == FD_Unformatted || LastFormat == FD_Break)
  643. addNextStateToQueue(Penalty, Node, /*NewLine=*/true, &Count, &Queue);
  644. }
  645. if (Queue.empty()) {
  646. // We were unable to find a solution, do nothing.
  647. // FIXME: Add diagnostic?
  648. DEBUG(llvm::dbgs() << "Could not find a solution.\n");
  649. return 0;
  650. }
  651. // Reconstruct the solution.
  652. if (!DryRun)
  653. reconstructPath(InitialState, Queue.top().second);
  654. DEBUG(llvm::dbgs() << "Total number of analyzed states: " << Count << "\n");
  655. DEBUG(llvm::dbgs() << "---\n");
  656. return Penalty;
  657. }
  658. /// \brief Add the following state to the analysis queue \c Queue.
  659. ///
  660. /// Assume the current state is \p PreviousNode and has been reached with a
  661. /// penalty of \p Penalty. Insert a line break if \p NewLine is \c true.
  662. void addNextStateToQueue(unsigned Penalty, StateNode *PreviousNode,
  663. bool NewLine, unsigned *Count, QueueType *Queue) {
  664. if (NewLine && !Indenter->canBreak(PreviousNode->State))
  665. return;
  666. if (!NewLine && Indenter->mustBreak(PreviousNode->State))
  667. return;
  668. StateNode *Node = new (Allocator.Allocate())
  669. StateNode(PreviousNode->State, NewLine, PreviousNode);
  670. if (!formatChildren(Node->State, NewLine, /*DryRun=*/true, Penalty))
  671. return;
  672. Penalty += Indenter->addTokenToState(Node->State, NewLine, true);
  673. Queue->push(QueueItem(OrderedPenalty(Penalty, *Count), Node));
  674. ++(*Count);
  675. }
  676. /// \brief Applies the best formatting by reconstructing the path in the
  677. /// solution space that leads to \c Best.
  678. void reconstructPath(LineState &State, StateNode *Best) {
  679. std::deque<StateNode *> Path;
  680. // We do not need a break before the initial token.
  681. while (Best->Previous) {
  682. Path.push_front(Best);
  683. Best = Best->Previous;
  684. }
  685. for (std::deque<StateNode *>::iterator I = Path.begin(), E = Path.end();
  686. I != E; ++I) {
  687. unsigned Penalty = 0;
  688. formatChildren(State, (*I)->NewLine, /*DryRun=*/false, Penalty);
  689. Penalty += Indenter->addTokenToState(State, (*I)->NewLine, false);
  690. DEBUG({
  691. printLineState((*I)->Previous->State);
  692. if ((*I)->NewLine) {
  693. llvm::dbgs() << "Penalty for placing "
  694. << (*I)->Previous->State.NextToken->Tok.getName() << ": "
  695. << Penalty << "\n";
  696. }
  697. });
  698. }
  699. }
  700. llvm::SpecificBumpPtrAllocator<StateNode> Allocator;
  701. };
  702. } // anonymous namespace
  703. unsigned
  704. UnwrappedLineFormatter::format(const SmallVectorImpl<AnnotatedLine *> &Lines,
  705. bool DryRun, int AdditionalIndent,
  706. bool FixBadIndentation) {
  707. LineJoiner Joiner(Style, Keywords, Lines);
  708. // Try to look up already computed penalty in DryRun-mode.
  709. std::pair<const SmallVectorImpl<AnnotatedLine *> *, unsigned> CacheKey(
  710. &Lines, AdditionalIndent);
  711. auto CacheIt = PenaltyCache.find(CacheKey);
  712. if (DryRun && CacheIt != PenaltyCache.end())
  713. return CacheIt->second;
  714. assert(!Lines.empty());
  715. unsigned Penalty = 0;
  716. LevelIndentTracker IndentTracker(Style, Keywords, Lines[0]->Level,
  717. AdditionalIndent);
  718. const AnnotatedLine *PreviousLine = nullptr;
  719. const AnnotatedLine *NextLine = nullptr;
  720. for (const AnnotatedLine *Line =
  721. Joiner.getNextMergedLine(DryRun, IndentTracker);
  722. Line; Line = NextLine) {
  723. const AnnotatedLine &TheLine = *Line;
  724. unsigned Indent = IndentTracker.getIndent();
  725. bool FixIndentation =
  726. FixBadIndentation && (Indent != TheLine.First->OriginalColumn);
  727. bool ShouldFormat = TheLine.Affected || FixIndentation;
  728. // We cannot format this line; if the reason is that the line had a
  729. // parsing error, remember that.
  730. if (ShouldFormat && TheLine.Type == LT_Invalid && IncompleteFormat)
  731. *IncompleteFormat = true;
  732. if (ShouldFormat && TheLine.Type != LT_Invalid) {
  733. if (!DryRun)
  734. formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level, Indent,
  735. TheLine.InPPDirective);
  736. NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
  737. unsigned ColumnLimit = getColumnLimit(TheLine.InPPDirective, NextLine);
  738. bool FitsIntoOneLine =
  739. TheLine.Last->TotalLength + Indent <= ColumnLimit ||
  740. TheLine.Type == LT_ImportStatement;
  741. if (Style.ColumnLimit == 0)
  742. NoColumnLimitLineFormatter(Indenter, Whitespaces, Style, this)
  743. .formatLine(TheLine, Indent, DryRun);
  744. else if (FitsIntoOneLine)
  745. Penalty += NoLineBreakFormatter(Indenter, Whitespaces, Style, this)
  746. .formatLine(TheLine, Indent, DryRun);
  747. else
  748. Penalty += OptimizingLineFormatter(Indenter, Whitespaces, Style, this)
  749. .formatLine(TheLine, Indent, DryRun);
  750. } else {
  751. // If no token in the current line is affected, we still need to format
  752. // affected children.
  753. if (TheLine.ChildrenAffected)
  754. format(TheLine.Children, DryRun);
  755. // Adapt following lines on the current indent level to the same level
  756. // unless the current \c AnnotatedLine is not at the beginning of a line.
  757. bool StartsNewLine =
  758. TheLine.First->NewlinesBefore > 0 || TheLine.First->IsFirst;
  759. if (StartsNewLine)
  760. IndentTracker.adjustToUnmodifiedLine(TheLine);
  761. if (!DryRun) {
  762. bool ReformatLeadingWhitespace =
  763. StartsNewLine && ((PreviousLine && PreviousLine->Affected) ||
  764. TheLine.LeadingEmptyLinesAffected);
  765. // Format the first token.
  766. if (ReformatLeadingWhitespace)
  767. formatFirstToken(*TheLine.First, PreviousLine, TheLine.Level,
  768. TheLine.First->OriginalColumn,
  769. TheLine.InPPDirective);
  770. else
  771. Whitespaces->addUntouchableToken(*TheLine.First,
  772. TheLine.InPPDirective);
  773. // Notify the WhitespaceManager about the unchanged whitespace.
  774. for (FormatToken *Tok = TheLine.First->Next; Tok; Tok = Tok->Next)
  775. Whitespaces->addUntouchableToken(*Tok, TheLine.InPPDirective);
  776. }
  777. NextLine = Joiner.getNextMergedLine(DryRun, IndentTracker);
  778. }
  779. if (!DryRun)
  780. markFinalized(TheLine.First);
  781. PreviousLine = &TheLine;
  782. }
  783. PenaltyCache[CacheKey] = Penalty;
  784. return Penalty;
  785. }
  786. void UnwrappedLineFormatter::formatFirstToken(FormatToken &RootToken,
  787. const AnnotatedLine *PreviousLine,
  788. unsigned IndentLevel,
  789. unsigned Indent,
  790. bool InPPDirective) {
  791. if (RootToken.is(tok::eof)) {
  792. unsigned Newlines = std::min(RootToken.NewlinesBefore, 1u);
  793. Whitespaces->replaceWhitespace(RootToken, Newlines, /*IndentLevel=*/0,
  794. /*Spaces=*/0, /*TargetColumn=*/0);
  795. return;
  796. }
  797. unsigned Newlines =
  798. std::min(RootToken.NewlinesBefore, Style.MaxEmptyLinesToKeep + 1);
  799. // Remove empty lines before "}" where applicable.
  800. if (RootToken.is(tok::r_brace) &&
  801. (!RootToken.Next ||
  802. (RootToken.Next->is(tok::semi) && !RootToken.Next->Next)))
  803. Newlines = std::min(Newlines, 1u);
  804. if (Newlines == 0 && !RootToken.IsFirst)
  805. Newlines = 1;
  806. if (RootToken.IsFirst && !RootToken.HasUnescapedNewline)
  807. Newlines = 0;
  808. // Remove empty lines after "{".
  809. if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine &&
  810. PreviousLine->Last->is(tok::l_brace) &&
  811. PreviousLine->First->isNot(tok::kw_namespace) &&
  812. !startsExternCBlock(*PreviousLine))
  813. Newlines = 1;
  814. // Insert extra new line before access specifiers.
  815. if (PreviousLine && PreviousLine->Last->isOneOf(tok::semi, tok::r_brace) &&
  816. RootToken.isAccessSpecifier() && RootToken.NewlinesBefore == 1)
  817. ++Newlines;
  818. // Remove empty lines after access specifiers.
  819. if (PreviousLine && PreviousLine->First->isAccessSpecifier() &&
  820. (!PreviousLine->InPPDirective || !RootToken.HasUnescapedNewline))
  821. Newlines = std::min(1u, Newlines);
  822. Whitespaces->replaceWhitespace(RootToken, Newlines, IndentLevel, Indent,
  823. Indent, InPPDirective &&
  824. !RootToken.HasUnescapedNewline);
  825. }
  826. unsigned
  827. UnwrappedLineFormatter::getColumnLimit(bool InPPDirective,
  828. const AnnotatedLine *NextLine) const {
  829. // In preprocessor directives reserve two chars for trailing " \" if the
  830. // next line continues the preprocessor directive.
  831. bool ContinuesPPDirective =
  832. InPPDirective &&
  833. // If there is no next line, this is likely a child line and the parent
  834. // continues the preprocessor directive.
  835. (!NextLine ||
  836. (NextLine->InPPDirective &&
  837. // If there is an unescaped newline between this line and the next, the
  838. // next line starts a new preprocessor directive.
  839. !NextLine->First->HasUnescapedNewline));
  840. return Style.ColumnLimit - (ContinuesPPDirective ? 2 : 0);
  841. }
  842. } // namespace format
  843. } // namespace clang