BreakableToken.cpp 41 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962
  1. //===--- BreakableToken.cpp - Format C++ code -----------------------------===//
  2. //
  3. // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
  4. // See https://llvm.org/LICENSE.txt for license information.
  5. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
  6. //
  7. //===----------------------------------------------------------------------===//
  8. ///
  9. /// \file
  10. /// Contains implementation of BreakableToken class and classes derived
  11. /// from it.
  12. ///
  13. //===----------------------------------------------------------------------===//
  14. #include "BreakableToken.h"
  15. #include "ContinuationIndenter.h"
  16. #include "clang/Basic/CharInfo.h"
  17. #include "clang/Format/Format.h"
  18. #include "llvm/ADT/STLExtras.h"
  19. #include "llvm/Support/Debug.h"
  20. #include <algorithm>
  21. #define DEBUG_TYPE "format-token-breaker"
  22. namespace clang {
  23. namespace format {
  24. static const char *const Blanks = " \t\v\f\r";
  25. static bool IsBlank(char C) {
  26. switch (C) {
  27. case ' ':
  28. case '\t':
  29. case '\v':
  30. case '\f':
  31. case '\r':
  32. return true;
  33. default:
  34. return false;
  35. }
  36. }
  37. static StringRef getLineCommentIndentPrefix(StringRef Comment,
  38. const FormatStyle &Style) {
  39. static const char *const KnownCStylePrefixes[] = {"///<", "//!<", "///", "//",
  40. "//!"};
  41. static const char *const KnownTextProtoPrefixes[] = {"//", "#", "##", "###",
  42. "####"};
  43. ArrayRef<const char *> KnownPrefixes(KnownCStylePrefixes);
  44. if (Style.Language == FormatStyle::LK_TextProto)
  45. KnownPrefixes = KnownTextProtoPrefixes;
  46. StringRef LongestPrefix;
  47. for (StringRef KnownPrefix : KnownPrefixes) {
  48. if (Comment.startswith(KnownPrefix)) {
  49. size_t PrefixLength = KnownPrefix.size();
  50. while (PrefixLength < Comment.size() && Comment[PrefixLength] == ' ')
  51. ++PrefixLength;
  52. if (PrefixLength > LongestPrefix.size())
  53. LongestPrefix = Comment.substr(0, PrefixLength);
  54. }
  55. }
  56. return LongestPrefix;
  57. }
  58. static BreakableToken::Split
  59. getCommentSplit(StringRef Text, unsigned ContentStartColumn,
  60. unsigned ColumnLimit, unsigned TabWidth,
  61. encoding::Encoding Encoding, const FormatStyle &Style) {
  62. LLVM_DEBUG(llvm::dbgs() << "Comment split: \"" << Text
  63. << "\", Column limit: " << ColumnLimit
  64. << ", Content start: " << ContentStartColumn << "\n");
  65. if (ColumnLimit <= ContentStartColumn + 1)
  66. return BreakableToken::Split(StringRef::npos, 0);
  67. unsigned MaxSplit = ColumnLimit - ContentStartColumn + 1;
  68. unsigned MaxSplitBytes = 0;
  69. for (unsigned NumChars = 0;
  70. NumChars < MaxSplit && MaxSplitBytes < Text.size();) {
  71. unsigned BytesInChar =
  72. encoding::getCodePointNumBytes(Text[MaxSplitBytes], Encoding);
  73. NumChars +=
  74. encoding::columnWidthWithTabs(Text.substr(MaxSplitBytes, BytesInChar),
  75. ContentStartColumn, TabWidth, Encoding);
  76. MaxSplitBytes += BytesInChar;
  77. }
  78. StringRef::size_type SpaceOffset = Text.find_last_of(Blanks, MaxSplitBytes);
  79. static auto *const kNumberedListRegexp = new llvm::Regex("^[1-9][0-9]?\\.");
  80. while (SpaceOffset != StringRef::npos) {
  81. // Do not split before a number followed by a dot: this would be interpreted
  82. // as a numbered list, which would prevent re-flowing in subsequent passes.
  83. if (kNumberedListRegexp->match(Text.substr(SpaceOffset).ltrim(Blanks)))
  84. SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
  85. // In JavaScript, some @tags can be followed by {, and machinery that parses
  86. // these comments will fail to understand the comment if followed by a line
  87. // break. So avoid ever breaking before a {.
  88. else if (Style.Language == FormatStyle::LK_JavaScript &&
  89. SpaceOffset + 1 < Text.size() && Text[SpaceOffset + 1] == '{')
  90. SpaceOffset = Text.find_last_of(Blanks, SpaceOffset);
  91. else
  92. break;
  93. }
  94. if (SpaceOffset == StringRef::npos ||
  95. // Don't break at leading whitespace.
  96. Text.find_last_not_of(Blanks, SpaceOffset) == StringRef::npos) {
  97. // Make sure that we don't break at leading whitespace that
  98. // reaches past MaxSplit.
  99. StringRef::size_type FirstNonWhitespace = Text.find_first_not_of(Blanks);
  100. if (FirstNonWhitespace == StringRef::npos)
  101. // If the comment is only whitespace, we cannot split.
  102. return BreakableToken::Split(StringRef::npos, 0);
  103. SpaceOffset = Text.find_first_of(
  104. Blanks, std::max<unsigned>(MaxSplitBytes, FirstNonWhitespace));
  105. }
  106. if (SpaceOffset != StringRef::npos && SpaceOffset != 0) {
  107. // adaptStartOfLine will break after lines starting with /** if the comment
  108. // is broken anywhere. Avoid emitting this break twice here.
  109. // Example: in /** longtextcomesherethatbreaks */ (with ColumnLimit 20) will
  110. // insert a break after /**, so this code must not insert the same break.
  111. if (SpaceOffset == 1 && Text[SpaceOffset - 1] == '*')
  112. return BreakableToken::Split(StringRef::npos, 0);
  113. StringRef BeforeCut = Text.substr(0, SpaceOffset).rtrim(Blanks);
  114. StringRef AfterCut = Text.substr(SpaceOffset).ltrim(Blanks);
  115. return BreakableToken::Split(BeforeCut.size(),
  116. AfterCut.begin() - BeforeCut.end());
  117. }
  118. return BreakableToken::Split(StringRef::npos, 0);
  119. }
  120. static BreakableToken::Split
  121. getStringSplit(StringRef Text, unsigned UsedColumns, unsigned ColumnLimit,
  122. unsigned TabWidth, encoding::Encoding Encoding) {
  123. // FIXME: Reduce unit test case.
  124. if (Text.empty())
  125. return BreakableToken::Split(StringRef::npos, 0);
  126. if (ColumnLimit <= UsedColumns)
  127. return BreakableToken::Split(StringRef::npos, 0);
  128. unsigned MaxSplit = ColumnLimit - UsedColumns;
  129. StringRef::size_type SpaceOffset = 0;
  130. StringRef::size_type SlashOffset = 0;
  131. StringRef::size_type WordStartOffset = 0;
  132. StringRef::size_type SplitPoint = 0;
  133. for (unsigned Chars = 0;;) {
  134. unsigned Advance;
  135. if (Text[0] == '\\') {
  136. Advance = encoding::getEscapeSequenceLength(Text);
  137. Chars += Advance;
  138. } else {
  139. Advance = encoding::getCodePointNumBytes(Text[0], Encoding);
  140. Chars += encoding::columnWidthWithTabs(
  141. Text.substr(0, Advance), UsedColumns + Chars, TabWidth, Encoding);
  142. }
  143. if (Chars > MaxSplit || Text.size() <= Advance)
  144. break;
  145. if (IsBlank(Text[0]))
  146. SpaceOffset = SplitPoint;
  147. if (Text[0] == '/')
  148. SlashOffset = SplitPoint;
  149. if (Advance == 1 && !isAlphanumeric(Text[0]))
  150. WordStartOffset = SplitPoint;
  151. SplitPoint += Advance;
  152. Text = Text.substr(Advance);
  153. }
  154. if (SpaceOffset != 0)
  155. return BreakableToken::Split(SpaceOffset + 1, 0);
  156. if (SlashOffset != 0)
  157. return BreakableToken::Split(SlashOffset + 1, 0);
  158. if (WordStartOffset != 0)
  159. return BreakableToken::Split(WordStartOffset + 1, 0);
  160. if (SplitPoint != 0)
  161. return BreakableToken::Split(SplitPoint, 0);
  162. return BreakableToken::Split(StringRef::npos, 0);
  163. }
  164. bool switchesFormatting(const FormatToken &Token) {
  165. assert((Token.is(TT_BlockComment) || Token.is(TT_LineComment)) &&
  166. "formatting regions are switched by comment tokens");
  167. StringRef Content = Token.TokenText.substr(2).ltrim();
  168. return Content.startswith("clang-format on") ||
  169. Content.startswith("clang-format off");
  170. }
  171. unsigned
  172. BreakableToken::getLengthAfterCompression(unsigned RemainingTokenColumns,
  173. Split Split) const {
  174. // Example: consider the content
  175. // lala lala
  176. // - RemainingTokenColumns is the original number of columns, 10;
  177. // - Split is (4, 2), denoting the two spaces between the two words;
  178. //
  179. // We compute the number of columns when the split is compressed into a single
  180. // space, like:
  181. // lala lala
  182. //
  183. // FIXME: Correctly measure the length of whitespace in Split.second so it
  184. // works with tabs.
  185. return RemainingTokenColumns + 1 - Split.second;
  186. }
  187. unsigned BreakableStringLiteral::getLineCount() const { return 1; }
  188. unsigned BreakableStringLiteral::getRangeLength(unsigned LineIndex,
  189. unsigned Offset,
  190. StringRef::size_type Length,
  191. unsigned StartColumn) const {
  192. llvm_unreachable("Getting the length of a part of the string literal "
  193. "indicates that the code tries to reflow it.");
  194. }
  195. unsigned
  196. BreakableStringLiteral::getRemainingLength(unsigned LineIndex, unsigned Offset,
  197. unsigned StartColumn) const {
  198. return UnbreakableTailLength + Postfix.size() +
  199. encoding::columnWidthWithTabs(Line.substr(Offset, StringRef::npos),
  200. StartColumn, Style.TabWidth, Encoding);
  201. }
  202. unsigned BreakableStringLiteral::getContentStartColumn(unsigned LineIndex,
  203. bool Break) const {
  204. return StartColumn + Prefix.size();
  205. }
  206. BreakableStringLiteral::BreakableStringLiteral(
  207. const FormatToken &Tok, unsigned StartColumn, StringRef Prefix,
  208. StringRef Postfix, unsigned UnbreakableTailLength, bool InPPDirective,
  209. encoding::Encoding Encoding, const FormatStyle &Style)
  210. : BreakableToken(Tok, InPPDirective, Encoding, Style),
  211. StartColumn(StartColumn), Prefix(Prefix), Postfix(Postfix),
  212. UnbreakableTailLength(UnbreakableTailLength) {
  213. assert(Tok.TokenText.startswith(Prefix) && Tok.TokenText.endswith(Postfix));
  214. Line = Tok.TokenText.substr(
  215. Prefix.size(), Tok.TokenText.size() - Prefix.size() - Postfix.size());
  216. }
  217. BreakableToken::Split BreakableStringLiteral::getSplit(
  218. unsigned LineIndex, unsigned TailOffset, unsigned ColumnLimit,
  219. unsigned ContentStartColumn, llvm::Regex &CommentPragmasRegex) const {
  220. return getStringSplit(Line.substr(TailOffset), ContentStartColumn,
  221. ColumnLimit - Postfix.size(), Style.TabWidth, Encoding);
  222. }
  223. void BreakableStringLiteral::insertBreak(unsigned LineIndex,
  224. unsigned TailOffset, Split Split,
  225. unsigned ContentIndent,
  226. WhitespaceManager &Whitespaces) const {
  227. Whitespaces.replaceWhitespaceInToken(
  228. Tok, Prefix.size() + TailOffset + Split.first, Split.second, Postfix,
  229. Prefix, InPPDirective, 1, StartColumn);
  230. }
  231. BreakableComment::BreakableComment(const FormatToken &Token,
  232. unsigned StartColumn, bool InPPDirective,
  233. encoding::Encoding Encoding,
  234. const FormatStyle &Style)
  235. : BreakableToken(Token, InPPDirective, Encoding, Style),
  236. StartColumn(StartColumn) {}
  237. unsigned BreakableComment::getLineCount() const { return Lines.size(); }
  238. BreakableToken::Split
  239. BreakableComment::getSplit(unsigned LineIndex, unsigned TailOffset,
  240. unsigned ColumnLimit, unsigned ContentStartColumn,
  241. llvm::Regex &CommentPragmasRegex) const {
  242. // Don't break lines matching the comment pragmas regex.
  243. if (CommentPragmasRegex.match(Content[LineIndex]))
  244. return Split(StringRef::npos, 0);
  245. return getCommentSplit(Content[LineIndex].substr(TailOffset),
  246. ContentStartColumn, ColumnLimit, Style.TabWidth,
  247. Encoding, Style);
  248. }
  249. void BreakableComment::compressWhitespace(
  250. unsigned LineIndex, unsigned TailOffset, Split Split,
  251. WhitespaceManager &Whitespaces) const {
  252. StringRef Text = Content[LineIndex].substr(TailOffset);
  253. // Text is relative to the content line, but Whitespaces operates relative to
  254. // the start of the corresponding token, so compute the start of the Split
  255. // that needs to be compressed into a single space relative to the start of
  256. // its token.
  257. unsigned BreakOffsetInToken =
  258. Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
  259. unsigned CharsToRemove = Split.second;
  260. Whitespaces.replaceWhitespaceInToken(
  261. tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "", "",
  262. /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
  263. }
  264. const FormatToken &BreakableComment::tokenAt(unsigned LineIndex) const {
  265. return Tokens[LineIndex] ? *Tokens[LineIndex] : Tok;
  266. }
  267. static bool mayReflowContent(StringRef Content) {
  268. Content = Content.trim(Blanks);
  269. // Lines starting with '@' commonly have special meaning.
  270. // Lines starting with '-', '-#', '+' or '*' are bulleted/numbered lists.
  271. bool hasSpecialMeaningPrefix = false;
  272. for (StringRef Prefix :
  273. {"@", "TODO", "FIXME", "XXX", "-# ", "- ", "+ ", "* "}) {
  274. if (Content.startswith(Prefix)) {
  275. hasSpecialMeaningPrefix = true;
  276. break;
  277. }
  278. }
  279. // Numbered lists may also start with a number followed by '.'
  280. // To avoid issues if a line starts with a number which is actually the end
  281. // of a previous line, we only consider numbers with up to 2 digits.
  282. static auto *const kNumberedListRegexp = new llvm::Regex("^[1-9][0-9]?\\. ");
  283. hasSpecialMeaningPrefix =
  284. hasSpecialMeaningPrefix || kNumberedListRegexp->match(Content);
  285. // Simple heuristic for what to reflow: content should contain at least two
  286. // characters and either the first or second character must be
  287. // non-punctuation.
  288. return Content.size() >= 2 && !hasSpecialMeaningPrefix &&
  289. !Content.endswith("\\") &&
  290. // Note that this is UTF-8 safe, since if isPunctuation(Content[0]) is
  291. // true, then the first code point must be 1 byte long.
  292. (!isPunctuation(Content[0]) || !isPunctuation(Content[1]));
  293. }
  294. BreakableBlockComment::BreakableBlockComment(
  295. const FormatToken &Token, unsigned StartColumn,
  296. unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
  297. encoding::Encoding Encoding, const FormatStyle &Style)
  298. : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style),
  299. DelimitersOnNewline(false),
  300. UnbreakableTailLength(Token.UnbreakableTailLength) {
  301. assert(Tok.is(TT_BlockComment) &&
  302. "block comment section must start with a block comment");
  303. StringRef TokenText(Tok.TokenText);
  304. assert(TokenText.startswith("/*") && TokenText.endswith("*/"));
  305. TokenText.substr(2, TokenText.size() - 4).split(Lines, "\n");
  306. int IndentDelta = StartColumn - OriginalStartColumn;
  307. Content.resize(Lines.size());
  308. Content[0] = Lines[0];
  309. ContentColumn.resize(Lines.size());
  310. // Account for the initial '/*'.
  311. ContentColumn[0] = StartColumn + 2;
  312. Tokens.resize(Lines.size());
  313. for (size_t i = 1; i < Lines.size(); ++i)
  314. adjustWhitespace(i, IndentDelta);
  315. // Align decorations with the column of the star on the first line,
  316. // that is one column after the start "/*".
  317. DecorationColumn = StartColumn + 1;
  318. // Account for comment decoration patterns like this:
  319. //
  320. // /*
  321. // ** blah blah blah
  322. // */
  323. if (Lines.size() >= 2 && Content[1].startswith("**") &&
  324. static_cast<unsigned>(ContentColumn[1]) == StartColumn) {
  325. DecorationColumn = StartColumn;
  326. }
  327. Decoration = "* ";
  328. if (Lines.size() == 1 && !FirstInLine) {
  329. // Comments for which FirstInLine is false can start on arbitrary column,
  330. // and available horizontal space can be too small to align consecutive
  331. // lines with the first one.
  332. // FIXME: We could, probably, align them to current indentation level, but
  333. // now we just wrap them without stars.
  334. Decoration = "";
  335. }
  336. for (size_t i = 1, e = Lines.size(); i < e && !Decoration.empty(); ++i) {
  337. // If the last line is empty, the closing "*/" will have a star.
  338. if (i + 1 == e && Content[i].empty())
  339. break;
  340. if (!Content[i].empty() && i + 1 != e && Decoration.startswith(Content[i]))
  341. continue;
  342. while (!Content[i].startswith(Decoration))
  343. Decoration = Decoration.substr(0, Decoration.size() - 1);
  344. }
  345. LastLineNeedsDecoration = true;
  346. IndentAtLineBreak = ContentColumn[0] + 1;
  347. for (size_t i = 1, e = Lines.size(); i < e; ++i) {
  348. if (Content[i].empty()) {
  349. if (i + 1 == e) {
  350. // Empty last line means that we already have a star as a part of the
  351. // trailing */. We also need to preserve whitespace, so that */ is
  352. // correctly indented.
  353. LastLineNeedsDecoration = false;
  354. // Align the star in the last '*/' with the stars on the previous lines.
  355. if (e >= 2 && !Decoration.empty()) {
  356. ContentColumn[i] = DecorationColumn;
  357. }
  358. } else if (Decoration.empty()) {
  359. // For all other lines, set the start column to 0 if they're empty, so
  360. // we do not insert trailing whitespace anywhere.
  361. ContentColumn[i] = 0;
  362. }
  363. continue;
  364. }
  365. // The first line already excludes the star.
  366. // The last line excludes the star if LastLineNeedsDecoration is false.
  367. // For all other lines, adjust the line to exclude the star and
  368. // (optionally) the first whitespace.
  369. unsigned DecorationSize = Decoration.startswith(Content[i])
  370. ? Content[i].size()
  371. : Decoration.size();
  372. if (DecorationSize) {
  373. ContentColumn[i] = DecorationColumn + DecorationSize;
  374. }
  375. Content[i] = Content[i].substr(DecorationSize);
  376. if (!Decoration.startswith(Content[i]))
  377. IndentAtLineBreak =
  378. std::min<int>(IndentAtLineBreak, std::max(0, ContentColumn[i]));
  379. }
  380. IndentAtLineBreak = std::max<unsigned>(IndentAtLineBreak, Decoration.size());
  381. // Detect a multiline jsdoc comment and set DelimitersOnNewline in that case.
  382. if (Style.Language == FormatStyle::LK_JavaScript ||
  383. Style.Language == FormatStyle::LK_Java) {
  384. if ((Lines[0] == "*" || Lines[0].startswith("* ")) && Lines.size() > 1) {
  385. // This is a multiline jsdoc comment.
  386. DelimitersOnNewline = true;
  387. } else if (Lines[0].startswith("* ") && Lines.size() == 1) {
  388. // Detect a long single-line comment, like:
  389. // /** long long long */
  390. // Below, '2' is the width of '*/'.
  391. unsigned EndColumn =
  392. ContentColumn[0] +
  393. encoding::columnWidthWithTabs(Lines[0], ContentColumn[0],
  394. Style.TabWidth, Encoding) +
  395. 2;
  396. DelimitersOnNewline = EndColumn > Style.ColumnLimit;
  397. }
  398. }
  399. LLVM_DEBUG({
  400. llvm::dbgs() << "IndentAtLineBreak " << IndentAtLineBreak << "\n";
  401. llvm::dbgs() << "DelimitersOnNewline " << DelimitersOnNewline << "\n";
  402. for (size_t i = 0; i < Lines.size(); ++i) {
  403. llvm::dbgs() << i << " |" << Content[i] << "| "
  404. << "CC=" << ContentColumn[i] << "| "
  405. << "IN=" << (Content[i].data() - Lines[i].data()) << "\n";
  406. }
  407. });
  408. }
  409. void BreakableBlockComment::adjustWhitespace(unsigned LineIndex,
  410. int IndentDelta) {
  411. // When in a preprocessor directive, the trailing backslash in a block comment
  412. // is not needed, but can serve a purpose of uniformity with necessary escaped
  413. // newlines outside the comment. In this case we remove it here before
  414. // trimming the trailing whitespace. The backslash will be re-added later when
  415. // inserting a line break.
  416. size_t EndOfPreviousLine = Lines[LineIndex - 1].size();
  417. if (InPPDirective && Lines[LineIndex - 1].endswith("\\"))
  418. --EndOfPreviousLine;
  419. // Calculate the end of the non-whitespace text in the previous line.
  420. EndOfPreviousLine =
  421. Lines[LineIndex - 1].find_last_not_of(Blanks, EndOfPreviousLine);
  422. if (EndOfPreviousLine == StringRef::npos)
  423. EndOfPreviousLine = 0;
  424. else
  425. ++EndOfPreviousLine;
  426. // Calculate the start of the non-whitespace text in the current line.
  427. size_t StartOfLine = Lines[LineIndex].find_first_not_of(Blanks);
  428. if (StartOfLine == StringRef::npos)
  429. StartOfLine = Lines[LineIndex].rtrim("\r\n").size();
  430. StringRef Whitespace = Lines[LineIndex].substr(0, StartOfLine);
  431. // Adjust Lines to only contain relevant text.
  432. size_t PreviousContentOffset =
  433. Content[LineIndex - 1].data() - Lines[LineIndex - 1].data();
  434. Content[LineIndex - 1] = Lines[LineIndex - 1].substr(
  435. PreviousContentOffset, EndOfPreviousLine - PreviousContentOffset);
  436. Content[LineIndex] = Lines[LineIndex].substr(StartOfLine);
  437. // Adjust the start column uniformly across all lines.
  438. ContentColumn[LineIndex] =
  439. encoding::columnWidthWithTabs(Whitespace, 0, Style.TabWidth, Encoding) +
  440. IndentDelta;
  441. }
  442. unsigned BreakableBlockComment::getRangeLength(unsigned LineIndex,
  443. unsigned Offset,
  444. StringRef::size_type Length,
  445. unsigned StartColumn) const {
  446. unsigned LineLength =
  447. encoding::columnWidthWithTabs(Content[LineIndex].substr(Offset, Length),
  448. StartColumn, Style.TabWidth, Encoding);
  449. // FIXME: This should go into getRemainingLength instead, but we currently
  450. // break tests when putting it there. Investigate how to fix those tests.
  451. // The last line gets a "*/" postfix.
  452. if (LineIndex + 1 == Lines.size()) {
  453. LineLength += 2;
  454. // We never need a decoration when breaking just the trailing "*/" postfix.
  455. // Note that checking that Length == 0 is not enough, since Length could
  456. // also be StringRef::npos.
  457. if (Content[LineIndex].substr(Offset, StringRef::npos).empty()) {
  458. LineLength -= Decoration.size();
  459. }
  460. }
  461. return LineLength;
  462. }
  463. unsigned BreakableBlockComment::getRemainingLength(unsigned LineIndex,
  464. unsigned Offset,
  465. unsigned StartColumn) const {
  466. return UnbreakableTailLength +
  467. getRangeLength(LineIndex, Offset, StringRef::npos, StartColumn);
  468. }
  469. unsigned BreakableBlockComment::getContentStartColumn(unsigned LineIndex,
  470. bool Break) const {
  471. if (Break)
  472. return IndentAtLineBreak;
  473. return std::max(0, ContentColumn[LineIndex]);
  474. }
  475. const llvm::StringSet<>
  476. BreakableBlockComment::ContentIndentingJavadocAnnotations = {
  477. "@param", "@return", "@returns", "@throws", "@type", "@template",
  478. "@see", "@deprecated", "@define", "@exports", "@mods", "@private",
  479. };
  480. unsigned BreakableBlockComment::getContentIndent(unsigned LineIndex) const {
  481. if (Style.Language != FormatStyle::LK_Java &&
  482. Style.Language != FormatStyle::LK_JavaScript)
  483. return 0;
  484. // The content at LineIndex 0 of a comment like:
  485. // /** line 0 */
  486. // is "* line 0", so we need to skip over the decoration in that case.
  487. StringRef ContentWithNoDecoration = Content[LineIndex];
  488. if (LineIndex == 0 && ContentWithNoDecoration.startswith("*")) {
  489. ContentWithNoDecoration = ContentWithNoDecoration.substr(1).ltrim(Blanks);
  490. }
  491. StringRef FirstWord = ContentWithNoDecoration.substr(
  492. 0, ContentWithNoDecoration.find_first_of(Blanks));
  493. if (ContentIndentingJavadocAnnotations.find(FirstWord) !=
  494. ContentIndentingJavadocAnnotations.end())
  495. return Style.ContinuationIndentWidth;
  496. return 0;
  497. }
  498. void BreakableBlockComment::insertBreak(unsigned LineIndex, unsigned TailOffset,
  499. Split Split, unsigned ContentIndent,
  500. WhitespaceManager &Whitespaces) const {
  501. StringRef Text = Content[LineIndex].substr(TailOffset);
  502. StringRef Prefix = Decoration;
  503. // We need this to account for the case when we have a decoration "* " for all
  504. // the lines except for the last one, where the star in "*/" acts as a
  505. // decoration.
  506. unsigned LocalIndentAtLineBreak = IndentAtLineBreak;
  507. if (LineIndex + 1 == Lines.size() &&
  508. Text.size() == Split.first + Split.second) {
  509. // For the last line we need to break before "*/", but not to add "* ".
  510. Prefix = "";
  511. if (LocalIndentAtLineBreak >= 2)
  512. LocalIndentAtLineBreak -= 2;
  513. }
  514. // The split offset is from the beginning of the line. Convert it to an offset
  515. // from the beginning of the token text.
  516. unsigned BreakOffsetInToken =
  517. Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
  518. unsigned CharsToRemove = Split.second;
  519. assert(LocalIndentAtLineBreak >= Prefix.size());
  520. std::string PrefixWithTrailingIndent = Prefix;
  521. for (unsigned I = 0; I < ContentIndent; ++I)
  522. PrefixWithTrailingIndent += " ";
  523. Whitespaces.replaceWhitespaceInToken(
  524. tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
  525. PrefixWithTrailingIndent, InPPDirective, /*Newlines=*/1,
  526. /*Spaces=*/LocalIndentAtLineBreak + ContentIndent -
  527. PrefixWithTrailingIndent.size());
  528. }
  529. BreakableToken::Split
  530. BreakableBlockComment::getReflowSplit(unsigned LineIndex,
  531. llvm::Regex &CommentPragmasRegex) const {
  532. if (!mayReflow(LineIndex, CommentPragmasRegex))
  533. return Split(StringRef::npos, 0);
  534. // If we're reflowing into a line with content indent, only reflow the next
  535. // line if its starting whitespace matches the content indent.
  536. size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
  537. if (LineIndex) {
  538. unsigned PreviousContentIndent = getContentIndent(LineIndex - 1);
  539. if (PreviousContentIndent && Trimmed != StringRef::npos &&
  540. Trimmed != PreviousContentIndent)
  541. return Split(StringRef::npos, 0);
  542. }
  543. return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
  544. }
  545. bool BreakableBlockComment::introducesBreakBeforeToken() const {
  546. // A break is introduced when we want delimiters on newline.
  547. return DelimitersOnNewline &&
  548. Lines[0].substr(1).find_first_not_of(Blanks) != StringRef::npos;
  549. }
  550. void BreakableBlockComment::reflow(unsigned LineIndex,
  551. WhitespaceManager &Whitespaces) const {
  552. StringRef TrimmedContent = Content[LineIndex].ltrim(Blanks);
  553. // Here we need to reflow.
  554. assert(Tokens[LineIndex - 1] == Tokens[LineIndex] &&
  555. "Reflowing whitespace within a token");
  556. // This is the offset of the end of the last line relative to the start of
  557. // the token text in the token.
  558. unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
  559. Content[LineIndex - 1].size() -
  560. tokenAt(LineIndex).TokenText.data();
  561. unsigned WhitespaceLength = TrimmedContent.data() -
  562. tokenAt(LineIndex).TokenText.data() -
  563. WhitespaceOffsetInToken;
  564. Whitespaces.replaceWhitespaceInToken(
  565. tokenAt(LineIndex), WhitespaceOffsetInToken,
  566. /*ReplaceChars=*/WhitespaceLength, /*PreviousPostfix=*/"",
  567. /*CurrentPrefix=*/ReflowPrefix, InPPDirective, /*Newlines=*/0,
  568. /*Spaces=*/0);
  569. }
  570. void BreakableBlockComment::adaptStartOfLine(
  571. unsigned LineIndex, WhitespaceManager &Whitespaces) const {
  572. if (LineIndex == 0) {
  573. if (DelimitersOnNewline) {
  574. // Since we're breaking at index 1 below, the break position and the
  575. // break length are the same.
  576. // Note: this works because getCommentSplit is careful never to split at
  577. // the beginning of a line.
  578. size_t BreakLength = Lines[0].substr(1).find_first_not_of(Blanks);
  579. if (BreakLength != StringRef::npos)
  580. insertBreak(LineIndex, 0, Split(1, BreakLength), /*ContentIndent=*/0,
  581. Whitespaces);
  582. }
  583. return;
  584. }
  585. // Here no reflow with the previous line will happen.
  586. // Fix the decoration of the line at LineIndex.
  587. StringRef Prefix = Decoration;
  588. if (Content[LineIndex].empty()) {
  589. if (LineIndex + 1 == Lines.size()) {
  590. if (!LastLineNeedsDecoration) {
  591. // If the last line was empty, we don't need a prefix, as the */ will
  592. // line up with the decoration (if it exists).
  593. Prefix = "";
  594. }
  595. } else if (!Decoration.empty()) {
  596. // For other empty lines, if we do have a decoration, adapt it to not
  597. // contain a trailing whitespace.
  598. Prefix = Prefix.substr(0, 1);
  599. }
  600. } else {
  601. if (ContentColumn[LineIndex] == 1) {
  602. // This line starts immediately after the decorating *.
  603. Prefix = Prefix.substr(0, 1);
  604. }
  605. }
  606. // This is the offset of the end of the last line relative to the start of the
  607. // token text in the token.
  608. unsigned WhitespaceOffsetInToken = Content[LineIndex - 1].data() +
  609. Content[LineIndex - 1].size() -
  610. tokenAt(LineIndex).TokenText.data();
  611. unsigned WhitespaceLength = Content[LineIndex].data() -
  612. tokenAt(LineIndex).TokenText.data() -
  613. WhitespaceOffsetInToken;
  614. Whitespaces.replaceWhitespaceInToken(
  615. tokenAt(LineIndex), WhitespaceOffsetInToken, WhitespaceLength, "", Prefix,
  616. InPPDirective, /*Newlines=*/1, ContentColumn[LineIndex] - Prefix.size());
  617. }
  618. BreakableToken::Split
  619. BreakableBlockComment::getSplitAfterLastLine(unsigned TailOffset) const {
  620. if (DelimitersOnNewline) {
  621. // Replace the trailing whitespace of the last line with a newline.
  622. // In case the last line is empty, the ending '*/' is already on its own
  623. // line.
  624. StringRef Line = Content.back().substr(TailOffset);
  625. StringRef TrimmedLine = Line.rtrim(Blanks);
  626. if (!TrimmedLine.empty())
  627. return Split(TrimmedLine.size(), Line.size() - TrimmedLine.size());
  628. }
  629. return Split(StringRef::npos, 0);
  630. }
  631. bool BreakableBlockComment::mayReflow(unsigned LineIndex,
  632. llvm::Regex &CommentPragmasRegex) const {
  633. // Content[LineIndex] may exclude the indent after the '*' decoration. In that
  634. // case, we compute the start of the comment pragma manually.
  635. StringRef IndentContent = Content[LineIndex];
  636. if (Lines[LineIndex].ltrim(Blanks).startswith("*")) {
  637. IndentContent = Lines[LineIndex].ltrim(Blanks).substr(1);
  638. }
  639. return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
  640. mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
  641. !switchesFormatting(tokenAt(LineIndex));
  642. }
  643. BreakableLineCommentSection::BreakableLineCommentSection(
  644. const FormatToken &Token, unsigned StartColumn,
  645. unsigned OriginalStartColumn, bool FirstInLine, bool InPPDirective,
  646. encoding::Encoding Encoding, const FormatStyle &Style)
  647. : BreakableComment(Token, StartColumn, InPPDirective, Encoding, Style) {
  648. assert(Tok.is(TT_LineComment) &&
  649. "line comment section must start with a line comment");
  650. FormatToken *LineTok = nullptr;
  651. for (const FormatToken *CurrentTok = &Tok;
  652. CurrentTok && CurrentTok->is(TT_LineComment);
  653. CurrentTok = CurrentTok->Next) {
  654. LastLineTok = LineTok;
  655. StringRef TokenText(CurrentTok->TokenText);
  656. assert((TokenText.startswith("//") || TokenText.startswith("#")) &&
  657. "unsupported line comment prefix, '//' and '#' are supported");
  658. size_t FirstLineIndex = Lines.size();
  659. TokenText.split(Lines, "\n");
  660. Content.resize(Lines.size());
  661. ContentColumn.resize(Lines.size());
  662. OriginalContentColumn.resize(Lines.size());
  663. Tokens.resize(Lines.size());
  664. Prefix.resize(Lines.size());
  665. OriginalPrefix.resize(Lines.size());
  666. for (size_t i = FirstLineIndex, e = Lines.size(); i < e; ++i) {
  667. Lines[i] = Lines[i].ltrim(Blanks);
  668. // We need to trim the blanks in case this is not the first line in a
  669. // multiline comment. Then the indent is included in Lines[i].
  670. StringRef IndentPrefix =
  671. getLineCommentIndentPrefix(Lines[i].ltrim(Blanks), Style);
  672. assert((TokenText.startswith("//") || TokenText.startswith("#")) &&
  673. "unsupported line comment prefix, '//' and '#' are supported");
  674. OriginalPrefix[i] = Prefix[i] = IndentPrefix;
  675. if (Lines[i].size() > Prefix[i].size() &&
  676. isAlphanumeric(Lines[i][Prefix[i].size()])) {
  677. if (Prefix[i] == "//")
  678. Prefix[i] = "// ";
  679. else if (Prefix[i] == "///")
  680. Prefix[i] = "/// ";
  681. else if (Prefix[i] == "//!")
  682. Prefix[i] = "//! ";
  683. else if (Prefix[i] == "///<")
  684. Prefix[i] = "///< ";
  685. else if (Prefix[i] == "//!<")
  686. Prefix[i] = "//!< ";
  687. else if (Prefix[i] == "#" &&
  688. Style.Language == FormatStyle::LK_TextProto)
  689. Prefix[i] = "# ";
  690. }
  691. Tokens[i] = LineTok;
  692. Content[i] = Lines[i].substr(IndentPrefix.size());
  693. OriginalContentColumn[i] =
  694. StartColumn + encoding::columnWidthWithTabs(OriginalPrefix[i],
  695. StartColumn,
  696. Style.TabWidth, Encoding);
  697. ContentColumn[i] =
  698. StartColumn + encoding::columnWidthWithTabs(Prefix[i], StartColumn,
  699. Style.TabWidth, Encoding);
  700. // Calculate the end of the non-whitespace text in this line.
  701. size_t EndOfLine = Content[i].find_last_not_of(Blanks);
  702. if (EndOfLine == StringRef::npos)
  703. EndOfLine = Content[i].size();
  704. else
  705. ++EndOfLine;
  706. Content[i] = Content[i].substr(0, EndOfLine);
  707. }
  708. LineTok = CurrentTok->Next;
  709. if (CurrentTok->Next && !CurrentTok->Next->ContinuesLineCommentSection) {
  710. // A line comment section needs to broken by a line comment that is
  711. // preceded by at least two newlines. Note that we put this break here
  712. // instead of breaking at a previous stage during parsing, since that
  713. // would split the contents of the enum into two unwrapped lines in this
  714. // example, which is undesirable:
  715. // enum A {
  716. // a, // comment about a
  717. //
  718. // // comment about b
  719. // b
  720. // };
  721. //
  722. // FIXME: Consider putting separate line comment sections as children to
  723. // the unwrapped line instead.
  724. break;
  725. }
  726. }
  727. }
  728. unsigned
  729. BreakableLineCommentSection::getRangeLength(unsigned LineIndex, unsigned Offset,
  730. StringRef::size_type Length,
  731. unsigned StartColumn) const {
  732. return encoding::columnWidthWithTabs(
  733. Content[LineIndex].substr(Offset, Length), StartColumn, Style.TabWidth,
  734. Encoding);
  735. }
  736. unsigned BreakableLineCommentSection::getContentStartColumn(unsigned LineIndex,
  737. bool Break) const {
  738. if (Break)
  739. return OriginalContentColumn[LineIndex];
  740. return ContentColumn[LineIndex];
  741. }
  742. void BreakableLineCommentSection::insertBreak(
  743. unsigned LineIndex, unsigned TailOffset, Split Split,
  744. unsigned ContentIndent, WhitespaceManager &Whitespaces) const {
  745. StringRef Text = Content[LineIndex].substr(TailOffset);
  746. // Compute the offset of the split relative to the beginning of the token
  747. // text.
  748. unsigned BreakOffsetInToken =
  749. Text.data() - tokenAt(LineIndex).TokenText.data() + Split.first;
  750. unsigned CharsToRemove = Split.second;
  751. // Compute the size of the new indent, including the size of the new prefix of
  752. // the newly broken line.
  753. unsigned IndentAtLineBreak = OriginalContentColumn[LineIndex] +
  754. Prefix[LineIndex].size() -
  755. OriginalPrefix[LineIndex].size();
  756. assert(IndentAtLineBreak >= Prefix[LineIndex].size());
  757. Whitespaces.replaceWhitespaceInToken(
  758. tokenAt(LineIndex), BreakOffsetInToken, CharsToRemove, "",
  759. Prefix[LineIndex], InPPDirective, /*Newlines=*/1,
  760. /*Spaces=*/IndentAtLineBreak - Prefix[LineIndex].size());
  761. }
  762. BreakableComment::Split BreakableLineCommentSection::getReflowSplit(
  763. unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const {
  764. if (!mayReflow(LineIndex, CommentPragmasRegex))
  765. return Split(StringRef::npos, 0);
  766. size_t Trimmed = Content[LineIndex].find_first_not_of(Blanks);
  767. // In a line comment section each line is a separate token; thus, after a
  768. // split we replace all whitespace before the current line comment token
  769. // (which does not need to be included in the split), plus the start of the
  770. // line up to where the content starts.
  771. return Split(0, Trimmed != StringRef::npos ? Trimmed : 0);
  772. }
  773. void BreakableLineCommentSection::reflow(unsigned LineIndex,
  774. WhitespaceManager &Whitespaces) const {
  775. if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
  776. // Reflow happens between tokens. Replace the whitespace between the
  777. // tokens by the empty string.
  778. Whitespaces.replaceWhitespace(
  779. *Tokens[LineIndex], /*Newlines=*/0, /*Spaces=*/0,
  780. /*StartOfTokenColumn=*/StartColumn, /*InPPDirective=*/false);
  781. } else if (LineIndex > 0) {
  782. // In case we're reflowing after the '\' in:
  783. //
  784. // // line comment \
  785. // // line 2
  786. //
  787. // the reflow happens inside the single comment token (it is a single line
  788. // comment with an unescaped newline).
  789. // Replace the whitespace between the '\' and '//' with the empty string.
  790. //
  791. // Offset points to after the '\' relative to start of the token.
  792. unsigned Offset = Lines[LineIndex - 1].data() +
  793. Lines[LineIndex - 1].size() -
  794. tokenAt(LineIndex - 1).TokenText.data();
  795. // WhitespaceLength is the number of chars between the '\' and the '//' on
  796. // the next line.
  797. unsigned WhitespaceLength =
  798. Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data() - Offset;
  799. Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
  800. /*ReplaceChars=*/WhitespaceLength,
  801. /*PreviousPostfix=*/"",
  802. /*CurrentPrefix=*/"",
  803. /*InPPDirective=*/false,
  804. /*Newlines=*/0,
  805. /*Spaces=*/0);
  806. }
  807. // Replace the indent and prefix of the token with the reflow prefix.
  808. unsigned Offset =
  809. Lines[LineIndex].data() - tokenAt(LineIndex).TokenText.data();
  810. unsigned WhitespaceLength =
  811. Content[LineIndex].data() - Lines[LineIndex].data();
  812. Whitespaces.replaceWhitespaceInToken(*Tokens[LineIndex], Offset,
  813. /*ReplaceChars=*/WhitespaceLength,
  814. /*PreviousPostfix=*/"",
  815. /*CurrentPrefix=*/ReflowPrefix,
  816. /*InPPDirective=*/false,
  817. /*Newlines=*/0,
  818. /*Spaces=*/0);
  819. }
  820. void BreakableLineCommentSection::adaptStartOfLine(
  821. unsigned LineIndex, WhitespaceManager &Whitespaces) const {
  822. // If this is the first line of a token, we need to inform Whitespace Manager
  823. // about it: either adapt the whitespace range preceding it, or mark it as an
  824. // untouchable token.
  825. // This happens for instance here:
  826. // // line 1 \
  827. // // line 2
  828. if (LineIndex > 0 && Tokens[LineIndex] != Tokens[LineIndex - 1]) {
  829. // This is the first line for the current token, but no reflow with the
  830. // previous token is necessary. However, we still may need to adjust the
  831. // start column. Note that ContentColumn[LineIndex] is the expected
  832. // content column after a possible update to the prefix, hence the prefix
  833. // length change is included.
  834. unsigned LineColumn =
  835. ContentColumn[LineIndex] -
  836. (Content[LineIndex].data() - Lines[LineIndex].data()) +
  837. (OriginalPrefix[LineIndex].size() - Prefix[LineIndex].size());
  838. // We always want to create a replacement instead of adding an untouchable
  839. // token, even if LineColumn is the same as the original column of the
  840. // token. This is because WhitespaceManager doesn't align trailing
  841. // comments if they are untouchable.
  842. Whitespaces.replaceWhitespace(*Tokens[LineIndex],
  843. /*Newlines=*/1,
  844. /*Spaces=*/LineColumn,
  845. /*StartOfTokenColumn=*/LineColumn,
  846. /*InPPDirective=*/false);
  847. }
  848. if (OriginalPrefix[LineIndex] != Prefix[LineIndex]) {
  849. // Adjust the prefix if necessary.
  850. // Take care of the space possibly introduced after a decoration.
  851. assert(Prefix[LineIndex] == (OriginalPrefix[LineIndex] + " ").str() &&
  852. "Expecting a line comment prefix to differ from original by at most "
  853. "a space");
  854. Whitespaces.replaceWhitespaceInToken(
  855. tokenAt(LineIndex), OriginalPrefix[LineIndex].size(), 0, "", "",
  856. /*InPPDirective=*/false, /*Newlines=*/0, /*Spaces=*/1);
  857. }
  858. }
  859. void BreakableLineCommentSection::updateNextToken(LineState &State) const {
  860. if (LastLineTok) {
  861. State.NextToken = LastLineTok->Next;
  862. }
  863. }
  864. bool BreakableLineCommentSection::mayReflow(
  865. unsigned LineIndex, llvm::Regex &CommentPragmasRegex) const {
  866. // Line comments have the indent as part of the prefix, so we need to
  867. // recompute the start of the line.
  868. StringRef IndentContent = Content[LineIndex];
  869. if (Lines[LineIndex].startswith("//")) {
  870. IndentContent = Lines[LineIndex].substr(2);
  871. }
  872. // FIXME: Decide whether we want to reflow non-regular indents:
  873. // Currently, we only reflow when the OriginalPrefix[LineIndex] matches the
  874. // OriginalPrefix[LineIndex-1]. That means we don't reflow
  875. // // text that protrudes
  876. // // into text with different indent
  877. // We do reflow in that case in block comments.
  878. return LineIndex > 0 && !CommentPragmasRegex.match(IndentContent) &&
  879. mayReflowContent(Content[LineIndex]) && !Tok.Finalized &&
  880. !switchesFormatting(tokenAt(LineIndex)) &&
  881. OriginalPrefix[LineIndex] == OriginalPrefix[LineIndex - 1];
  882. }
  883. } // namespace format
  884. } // namespace clang